library(caret)
library(pROC)
library(dplyr)
library("fastDummies")
library(glmnet)
library(rpart)
library(e1071)
library(class)
library(rpart)
library(randomForest)
library(datasets)
library(xgboost)
library(DiagrammeR)
df_original <- read.csv('../cleaned_data/cleaning_data-02.csv')Data 2 : Analysis and Modeling
install.packages('vscDebugger')Warning message:
"package 'vscDebugger' is not available for this version of R
A version of this package for your version of R might be available elsewhere,
see the ideas at
https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages"
colnames(df_original)- 'BEM_ID'
- 'P_IDCOR'
- 'H_IDNUMBER'
- 'HHNO'
- 'PERSNO'
- 'RELHEAD'
- 'A03'
- 'A07'
- 'A08'
- 'A09'
- 'A10A'
- 'A11'
- 'A11Y'
- 'A12'
- 'A13'
- 'A14'
- 'A15V1'
- 'A15V4'
- 'D1A_6AF'
- 'D1A_7MF'
- 'D1A_7YF'
- 'D1A_8F'
- 'D1A_9F'
- 'D1A_10CF'
- 'D1A_1V1F'
- 'D1A_1V3F'
- 'D1A_1V4F'
- 'D1A_1V7F'
- 'D1A_1V8F'
- 'D1A_1V9F'
- 'D1A_6AL'
- 'D1A_7ML'
- 'D1A_7YL'
- 'D1A_8L'
- 'D1A_9L'
- 'D1A_10CL'
- 'D1A_1V1L'
- 'D1A_1V3L'
- 'D1A_1V4L'
- 'D1A_1V7L'
- 'D1A_1V8L'
- 'D1A_1V9L'
- 'D1A_4'
- 'D1A_8YF'
- 'D1A_8YL'
- 'D1B_6AF'
- 'D1B_7MF'
- 'D1B_7YF'
- 'D1B_8F'
- 'D1B_9F'
- 'D1B_1V1F'
- 'D1B_1V4F'
- 'D1B_1V6F'
- 'D1B_1V7F'
- 'D1B_1V8F'
- 'D1B_1V9F'
- 'D1B_6AL'
- 'D1B_7ML'
- 'D1B_7YL'
- 'D1B_8L'
- 'D1B_9L'
- 'D1B_1V1L'
- 'D1B_1V4L'
- 'D1B_1V6L'
- 'D1B_1V7L'
- 'D1B_1V8L'
- 'D1B_1V9L'
- 'D1B_4'
- 'D1C_6AF'
- 'D1C_7MF'
- 'D1C_7YF'
- 'D1C_8F'
- 'D1C_9F'
- 'D1C_10CF'
- 'D1C_1V1F'
- 'D1C_1V3F'
- 'D1C_1V4F'
- 'D1C_1V5F'
- 'D1C_1V6F'
- 'D1C_1V7F'
- 'D1C_1V8F'
- 'D1C_1V9F'
- 'D1C_6AL'
- 'D1C_7ML'
- 'D1C_7YL'
- 'D1C_8L'
- 'D1C_9L'
- 'D1C_10CL'
- 'D1C_1V1L'
- 'D1C_1V3L'
- 'D1C_1V4L'
- 'D1C_1V5L'
- 'D1C_1V6L'
- 'D1C_1V7L'
- 'D1C_1V8L'
- 'D1C_1V9L'
- 'D1C_1V10L'
- 'D1C_4'
- 'N1_1V1'
- 'N1_2'
- 'N1_3'
- 'N1_6U'
- 'N1_6TAKA'
- 'N1_7'
- 'N1_8'
- 'N1_9'
- 'N1_10'
- 'N1_11'
- 'N1_12'
- 'N1_13'
- 'N1_14'
- 'N1_15V1'
- 'N1_16'
- 'N1_17'
- 'O1_1'
- 'O1_2'
- 'O1_3'
- 'O1_4V1'
- 'O1_5'
- 'O1_6'
- 'G1_2S1'
- 'G1_3S1'
- 'G1_4S1'
- 'G1_5A1S1'
- 'G1_6S1'
- 'G1_2S2'
- 'G1_3S2'
- 'G1_4S2'
- 'G1_5A1S2'
- 'G1_6S2'
- 'G1_2S3'
- 'G1_3S3'
- 'G1_4S3'
- 'G1_5A1S3'
- 'G1_6S3'
- 'G1_2SIL1'
- 'G1_3SIL1'
- 'G1_4SIL1'
- 'G1_5A1SIL1'
- 'G1_6SIL1'
- 'G2_2S1'
- 'G2_3S1'
- 'G2_4S1'
- 'G2_5A1S1'
- 'G2_5B1S1'
- 'G2_6S1'
- 'G2_7S1'
- 'DISTRICT'
- 'D1A_10AF_3M'
- 'D1A_10AL_3M'
- 'D1C_10AF_3M'
- 'D1C_10AL_3M'
- 'A05'
- 'MOUZA_GRP'
dim(df_original)- 23506
- 154
Out of all the columns, based on Dorato’s paper, and after going into a deep dive of what information each column was showcasing, we decided to specifically choose the columns below which were in synchronisation with the objective of the paper and in terms of the base logistic’s significant variables list.
Base Model: Logistic Regression
# subsetting the data
df1 <-df_original[, c("D1A_1V1L", "A08", "A12", "A13", "A14", "D1A_7ML", "A15V1", "A15V4", "D1A_4", "A11Y" ,"N1_6TAKA", "N1_12", "N1_13", "N1_14", "N1_16", "N1_17", "D1A_10AF_3M", "D1A_10AL_3M", "D1A_8F")]
df1$Age <- 2019 - df1$A08
df1 <-df1[, c("D1A_1V1L", "A12", "A13", "A14", "D1A_7ML", "A15V1", "A15V4", "D1A_4", "A11Y" ,"N1_6TAKA", "N1_12", "N1_13", "N1_14", "N1_16", "N1_17", "D1A_10AF_3M", "D1A_10AL_3M", "D1A_8F")]
# taking out D1A_6AL because too many destinations in list
df1 <-df1[, c("D1A_1V1L", "A12", "A13", "A14", "D1A_7ML", "A15V1", "A15V4", "D1A_4", "A11Y" ,"N1_6TAKA", "N1_12", "N1_13", "N1_14", "N1_16", "N1_17", "D1A_10AF_3M", "D1A_10AL_3M", "D1A_8F")]
df1 <- df1 %>% rename( "Work_Earn_Money" = D1A_1V1L,
"Can_write_letter" = A12,
"Education_Level" = A13,
"Livelihood_Occupation" = A14,
"Month_Arrival" = D1A_7ML,
"Migraton_Experience_Internal" = A15V1,
"No_Migration_Experience" = A15V4,
"Number_Trips" = D1A_4,
"Age_First_Marriage" = A11Y,
"Paid_in_Taka" = N1_6TAKA,
"Rent_per_Month" = N1_12,
"Food_budget" = N1_13,
"Monthly_Remittances" = N1_14,
"Monthly_Savings" = N1_16,
"Saving_brought_Home" = N1_17,
"Wage_First_Head" = D1A_10AF_3M,
"Wage_Last_Head"= D1A_10AL_3M,
"Duration_of_stay" = D1A_8F)
#converting cateogorical variables to factor
#columns_to_factor <- c("D1A_1V1L", "A12", "A13", "A14", "D1A_7ML", "A15V1", "A15V4")
columns_to_factor <- c("Work_Earn_Money", "Can_write_letter", "Education_Level", "Livelihood_Occupation", "Month_Arrival", "Migraton_Experience_Internal", "No_Migration_Experience")
df1[columns_to_factor] <- lapply(df1[columns_to_factor], factor)
print(head(df1)) Work_Earn_Money Can_write_letter Education_Level Livelihood_Occupation
1 <NA> 1 4 14
2 <NA> <NA> 2 <NA>
3 <NA> <NA> <NA> <NA>
4 <NA> 2 2 17
5 <NA> 1 4 10
6 <NA> 2 4 17
Month_Arrival Migraton_Experience_Internal No_Migration_Experience
1 <NA> <NA> 4
2 <NA> <NA> 4
3 <NA> <NA> 4
4 <NA> <NA> 4
5 <NA> <NA> <NA>
6 <NA> 1 <NA>
Number_Trips Age_First_Marriage Paid_in_Taka Rent_per_Month Food_budget
1 NA NA NA NA NA
2 NA NA NA NA NA
3 NA NA NA NA NA
4 NA 13 NA NA NA
5 NA 19 NA NA NA
6 NA 18 NA NA NA
Monthly_Remittances Monthly_Savings Saving_brought_Home Wage_First_Head
1 NA NA NA NA
2 NA NA NA NA
3 NA NA NA NA
4 NA NA NA NA
5 NA NA NA NA
6 NA NA NA NA
Wage_Last_Head Duration_of_stay
1 NA NA
2 NA NA
3 NA NA
4 NA NA
5 NA NA
6 NA NA
# converting variables to dummy variables
df1 <- fastDummies::dummy_cols(df1, remove_first_dummy = TRUE, remove_selected_columns = TRUE)
print(dim(df1))
print(head(df1))[1] 23506 59
Number_Trips Age_First_Marriage Paid_in_Taka Rent_per_Month Food_budget
1 NA NA NA NA NA
2 NA NA NA NA NA
3 NA NA NA NA NA
4 NA 13 NA NA NA
5 NA 19 NA NA NA
6 NA 18 NA NA NA
Monthly_Remittances Monthly_Savings Saving_brought_Home Wage_First_Head
1 NA NA NA NA
2 NA NA NA NA
3 NA NA NA NA
4 NA NA NA NA
5 NA NA NA NA
6 NA NA NA NA
Wage_Last_Head Duration_of_stay Work_Earn_Money_1 Work_Earn_Money_NA
1 NA NA NA 1
2 NA NA NA 1
3 NA NA NA 1
4 NA NA NA 1
5 NA NA NA 1
6 NA NA NA 1
Can_write_letter_2 Can_write_letter_NA Education_Level_2 Education_Level_3
1 0 0 0 0
2 NA 1 1 0
3 NA 1 NA NA
4 1 0 1 0
5 0 0 0 0
6 1 0 0 0
Education_Level_4 Education_Level_5 Education_Level_6 Education_Level_7
1 1 0 0 0
2 0 0 0 0
3 NA NA NA NA
4 0 0 0 0
5 1 0 0 0
6 1 0 0 0
Education_Level_8 Education_Level_9 Education_Level_NA
1 0 0 0
2 0 0 0
3 NA NA 1
4 0 0 0
5 0 0 0
6 0 0 0
Livelihood_Occupation_2 Livelihood_Occupation_3 Livelihood_Occupation_4
1 0 0 0
2 NA NA NA
3 NA NA NA
4 0 0 0
5 0 0 0
6 0 0 0
Livelihood_Occupation_5 Livelihood_Occupation_6 Livelihood_Occupation_7
1 0 0 0
2 NA NA NA
3 NA NA NA
4 0 0 0
5 0 0 0
6 0 0 0
Livelihood_Occupation_8 Livelihood_Occupation_9 Livelihood_Occupation_10
1 0 0 0
2 NA NA NA
3 NA NA NA
4 0 0 0
5 0 0 1
6 0 0 0
Livelihood_Occupation_11 Livelihood_Occupation_12 Livelihood_Occupation_13
1 0 0 0
2 NA NA NA
3 NA NA NA
4 0 0 0
5 0 0 0
6 0 0 0
Livelihood_Occupation_14 Livelihood_Occupation_15 Livelihood_Occupation_16
1 1 0 0
2 NA NA NA
3 NA NA NA
4 0 0 0
5 0 0 0
6 0 0 0
Livelihood_Occupation_17 Livelihood_Occupation_18 Livelihood_Occupation_19
1 0 0 0
2 NA NA NA
3 NA NA NA
4 1 0 0
5 0 0 0
6 1 0 0
Livelihood_Occupation_99 Livelihood_Occupation_NA Month_Arrival_2
1 0 0 NA
2 NA 1 NA
3 NA 1 NA
4 0 0 NA
5 0 0 NA
6 0 0 NA
Month_Arrival_3 Month_Arrival_4 Month_Arrival_5 Month_Arrival_6
1 NA NA NA NA
2 NA NA NA NA
3 NA NA NA NA
4 NA NA NA NA
5 NA NA NA NA
6 NA NA NA NA
Month_Arrival_7 Month_Arrival_8 Month_Arrival_9 Month_Arrival_10
1 NA NA NA NA
2 NA NA NA NA
3 NA NA NA NA
4 NA NA NA NA
5 NA NA NA NA
6 NA NA NA NA
Month_Arrival_11 Month_Arrival_12 Month_Arrival_98 Month_Arrival_NA
1 NA NA NA 1
2 NA NA NA 1
3 NA NA NA 1
4 NA NA NA 1
5 NA NA NA 1
6 NA NA NA 1
Migraton_Experience_Internal_NA No_Migration_Experience_NA
1 1 0
2 1 0
3 1 0
4 1 0
5 1 1
6 0 1
# removing rows that do not have D1A_1V1L
df1 <- df1[!is.na(df1$Work_Earn_Money_1), ]
#replace NA with 0, 0 meaning "No", assuming that if the participant did not say yes to a question, than it would be no.
df1 <- replace(df1, is.na(df1), 0)
print(head(df1)) Number_Trips Age_First_Marriage Paid_in_Taka Rent_per_Month Food_budget
11 1 29 13000 0 12000
33 1 21 39800 0 3000
38 4 23 1000 7000 10000
43 1 21 6000 1500 2000
50 1 16 0 0 0
56 1 26 6000 1000 2000
Monthly_Remittances Monthly_Savings Saving_brought_Home Wage_First_Head
11 0 1000 0 18000
33 0 300 0 20000
38 10000 2000 30000 0
43 2000 500 0 6000
50 0 0 0 0
56 2000 1000 4000 6000
Wage_Last_Head Duration_of_stay Work_Earn_Money_1 Work_Earn_Money_NA
11 18000 997 0 0
33 20000 997 0 0
38 0 156 1 0
43 6000 60 1 0
50 0 84 0 0
56 6000 12 1 0
Can_write_letter_2 Can_write_letter_NA Education_Level_2 Education_Level_3
11 1 0 1 0
33 1 0 0 0
38 1 0 1 0
43 1 0 1 0
50 1 0 0 0
56 0 0 0 1
Education_Level_4 Education_Level_5 Education_Level_6 Education_Level_7
11 0 0 0 0
33 0 0 0 0
38 0 0 0 0
43 0 0 0 0
50 0 0 0 0
56 0 0 0 0
Education_Level_8 Education_Level_9 Education_Level_NA
11 0 0 0
33 0 0 0
38 0 0 0
43 0 0 0
50 0 0 0
56 0 0 0
Livelihood_Occupation_2 Livelihood_Occupation_3 Livelihood_Occupation_4
11 1 0 0
33 1 0 0
38 0 0 0
43 0 0 1
50 0 1 0
56 0 0 1
Livelihood_Occupation_5 Livelihood_Occupation_6 Livelihood_Occupation_7
11 0 0 0
33 0 0 0
38 0 0 0
43 0 0 0
50 0 0 0
56 0 0 0
Livelihood_Occupation_8 Livelihood_Occupation_9 Livelihood_Occupation_10
11 0 0 0
33 0 0 0
38 0 0 0
43 0 0 0
50 0 0 0
56 0 0 0
Livelihood_Occupation_11 Livelihood_Occupation_12 Livelihood_Occupation_13
11 0 0 0
33 0 0 0
38 0 0 1
43 0 0 0
50 0 0 0
56 0 0 0
Livelihood_Occupation_14 Livelihood_Occupation_15 Livelihood_Occupation_16
11 0 0 0
33 0 0 0
38 0 0 0
43 0 0 0
50 0 0 0
56 0 0 0
Livelihood_Occupation_17 Livelihood_Occupation_18 Livelihood_Occupation_19
11 0 0 0
33 0 0 0
38 0 0 0
43 0 0 0
50 0 0 0
56 0 0 0
Livelihood_Occupation_99 Livelihood_Occupation_NA Month_Arrival_2
11 0 0 0
33 0 0 0
38 0 0 0
43 0 0 0
50 0 0 0
56 0 0 0
Month_Arrival_3 Month_Arrival_4 Month_Arrival_5 Month_Arrival_6
11 0 0 0 0
33 0 0 0 0
38 0 0 0 0
43 0 0 0 0
50 0 0 0 0
56 0 0 0 0
Month_Arrival_7 Month_Arrival_8 Month_Arrival_9 Month_Arrival_10
11 0 0 0 0
33 0 0 0 0
38 0 0 0 0
43 0 0 0 0
50 0 0 0 0
56 0 0 0 0
Month_Arrival_11 Month_Arrival_12 Month_Arrival_98 Month_Arrival_NA
11 0 0 1 0
33 0 0 1 0
38 0 0 0 0
43 0 0 1 0
50 0 0 1 0
56 0 0 1 0
Migraton_Experience_Internal_NA No_Migration_Experience_NA
11 0 1
33 0 1
38 0 1
43 0 1
50 0 1
56 0 1
# partitioning the data
set.seed(123) # for reproducibility
#train_idx <- createDataPartition(df1$D1A_1V1L, p = 0.8, list = FALSE)
train_idx <- createDataPartition(df1$Work_Earn_Money_1, p = 0.8, list = FALSE)
train <- df1[train_idx,]
test <- df1[-train_idx,] #trying a logistic regression
model <- glm(Work_Earn_Money_1 ~ ., data = df1, family = binomial(link='logit'))
# look at summary of logistic regression model
(summary2_lm <- summary(model))
Call:
glm(formula = Work_Earn_Money_1 ~ ., family = binomial(link = "logit"),
data = df1)
Coefficients: (6 not defined because of singularities)
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.108e+01 6.132e+02 -0.018 0.985581
Number_Trips 2.102e-01 8.233e-02 2.553 0.010670 *
Age_First_Marriage -1.188e-02 1.574e-02 -0.755 0.450284
Paid_in_Taka 4.180e-06 2.971e-06 1.407 0.159556
Rent_per_Month 1.868e-04 6.995e-05 2.670 0.007590 **
Food_budget -7.234e-06 3.027e-05 -0.239 0.811126
Monthly_Remittances 3.018e-04 4.956e-05 6.089 1.13e-09 ***
Monthly_Savings 6.897e-05 4.061e-05 1.698 0.089426 .
Saving_brought_Home 1.342e-06 3.328e-06 0.403 0.686694
Wage_First_Head -5.990e-05 3.420e-05 -1.751 0.079872 .
Wage_Last_Head 1.371e-04 3.409e-05 4.021 5.80e-05 ***
Duration_of_stay -1.485e-03 2.789e-04 -5.326 1.01e-07 ***
Work_Earn_Money_NA NA NA NA NA
Can_write_letter_2 3.758e-01 3.685e-01 1.020 0.307896
Can_write_letter_NA NA NA NA NA
Education_Level_2 -5.993e-01 3.622e-01 -1.655 0.097935 .
Education_Level_3 -1.055e+00 4.906e-01 -2.150 0.031541 *
Education_Level_4 -8.061e-01 4.968e-01 -1.622 0.104709
Education_Level_5 -1.523e+00 5.347e-01 -2.849 0.004385 **
Education_Level_6 -1.947e+00 6.565e-01 -2.965 0.003025 **
Education_Level_7 -1.826e+00 5.520e-01 -3.309 0.000936 ***
Education_Level_8 -2.470e+00 5.359e-01 -4.609 4.04e-06 ***
Education_Level_9 1.081e+01 8.827e+02 0.012 0.990231
Education_Level_NA NA NA NA NA
Livelihood_Occupation_2 1.045e+00 6.869e-01 1.521 0.128282
Livelihood_Occupation_3 1.020e+00 8.858e-01 1.152 0.249324
Livelihood_Occupation_4 1.825e+00 1.222e+00 1.493 0.135354
Livelihood_Occupation_5 3.996e-02 9.724e-01 0.041 0.967220
Livelihood_Occupation_6 -1.519e+00 1.494e+00 -1.017 0.308988
Livelihood_Occupation_7 1.500e+00 7.491e-01 2.003 0.045199 *
Livelihood_Occupation_8 -1.623e+00 9.062e-01 -1.792 0.073212 .
Livelihood_Occupation_9 2.427e+00 8.715e-01 2.785 0.005354 **
Livelihood_Occupation_10 9.955e-01 6.808e-01 1.462 0.143667
Livelihood_Occupation_11 7.487e-01 6.764e-01 1.107 0.268302
Livelihood_Occupation_12 -3.007e-01 7.075e-01 -0.425 0.670858
Livelihood_Occupation_13 8.573e-01 6.629e-01 1.293 0.195943
Livelihood_Occupation_14 6.300e-01 1.027e+00 0.613 0.539726
Livelihood_Occupation_15 1.257e+00 7.156e-01 1.756 0.079036 .
Livelihood_Occupation_16 1.174e+00 9.175e-01 1.280 0.200666
Livelihood_Occupation_17 -2.769e+00 6.844e-01 -4.046 5.21e-05 ***
Livelihood_Occupation_18 8.129e-01 8.976e-01 0.906 0.365155
Livelihood_Occupation_19 6.192e-01 8.443e-01 0.733 0.463344
Livelihood_Occupation_99 NA NA NA NA
Livelihood_Occupation_NA NA NA NA NA
Month_Arrival_2 -9.650e-02 5.036e-01 -0.192 0.848045
Month_Arrival_3 -8.094e-01 5.129e-01 -1.578 0.114536
Month_Arrival_4 -3.629e-01 6.851e-01 -0.530 0.596311
Month_Arrival_5 -1.054e+00 5.900e-01 -1.787 0.073955 .
Month_Arrival_6 -1.516e-01 5.567e-01 -0.272 0.785452
Month_Arrival_7 6.641e-01 7.465e-01 0.890 0.373677
Month_Arrival_8 1.228e-01 7.708e-01 0.159 0.873387
Month_Arrival_9 1.004e+00 7.811e-01 1.286 0.198474
Month_Arrival_10 -1.708e+00 8.719e-01 -1.959 0.050060 .
Month_Arrival_11 2.070e-01 8.759e-01 0.236 0.813144
Month_Arrival_12 -5.850e-01 7.528e-01 -0.777 0.437093
Month_Arrival_98 -5.213e-01 3.736e-01 -1.395 0.162918
Month_Arrival_NA NA NA NA NA
Migraton_Experience_Internal_NA 2.354e+01 1.075e+03 0.022 0.982529
No_Migration_Experience_NA 1.308e+01 6.132e+02 0.021 0.982988
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1745.4 on 1999 degrees of freedom
Residual deviance: 1028.3 on 1947 degrees of freedom
AIC: 1134.3
Number of Fisher Scoring iterations: 13
Though we focused on subsetting the data in the inital go by looking at the data description, we used logistic regression on the subsetted data to find the significant variables in comparison and used those.
# select the features that are significant for the baseline model and other models
df_model <- df1[ , c('Work_Earn_Money_1', 'Number_Trips', 'Rent_per_Month', 'Monthly_Remittances', 'Wage_Last_Head', 'Duration_of_stay', 'Education_Level_3', 'Education_Level_5', 'Education_Level_6', 'Education_Level_7', 'Education_Level_8', 'Livelihood_Occupation_7', 'Livelihood_Occupation_9', 'Livelihood_Occupation_17')]
set.seed(123) # for reproducibility
train_idx <- createDataPartition(df_model$Work_Earn_Money_1, p = 0.8, list = FALSE)
train <- df_model[train_idx,]
test <- df_model[-train_idx,]# Create model with predictors found from linear model
#model2 <- glm(Work_Earn_Money_1 ~ D1A_4 + N1_12 + N1_14 + D1A_10AL_3M + D1A_8F + A13_3 + A13_5 +
#A13_6 + A13_7 + A13_8 + A14_7 + A14_9 + A14_17, data = train, family = binomial)
# Create model with predictors found from linear model
model2 <- glm(Work_Earn_Money_1 ~ ., data = train, family = binomial(link='logit'))
#model2 <- glm(Work_Earn_Money_1 ~ D1A_4 + N1_12 + N1_14 + D1A_10AL_3M + D1A_8F + A13_3 + A13_5 +
#A13_6 + A13_7 + A13_8 + A14_7 + A14_9 + A14_17, data = train, family = binomial)
# predictions on test data
predictions <- predict(model2, newdata = test, type = "response")
# convert probabilities to class labels, assign greater than 0.5 to Positive
pred_classes <- ifelse(predictions > 0.5, "Positive", "Negative")
# confusion matrix
cm <- table(Actual = test$Work_Earn_Money_1, Predicted = pred_classes)
# Confusion Matrix
print(cm)
roc_curve_lr <- roc(test$Work_Earn_Money_1, predictions, legacy.axes = FALSE)
# Plot ROC curve
plot(roc_curve_lr, main = "ROC Curve", col = "blue", legacy.axes = T)
# Save AUC
auc_lr <- round(auc(roc_curve_lr), 2) Predicted
Actual Negative Positive
0 34 31
1 10 325
Setting levels: control = 0, case = 1
Setting direction: controls < cases
# Calculate True Positives (TP), True Negatives (TN), False Positives (FP), False Negatives (FN)
TP <- 325
TN <- 34
FP <- 41
FN <- 10
# Calculate accuracy
accuracy <- (TP + TN) / sum(cm)
# Calculate precision
precision <- TP / (TP + FP)
# Calculate recall (also called sensitivity)
recall <- TP / (TP + FN)
# Calculate F1 score
F1 <- 2 * (precision * recall) / (precision + recall)
#Specificity
# Calculate specificity
specificity <- (TN / (TN + FP))
# Calculate AIC
aic <- AIC(model2)
# Calculate BIC
bic <- BIC(model2)
# Print AIC and BIC
print(paste("AIC:", aic))
print(paste("BIC:", bic))
# Print the metrics
cat("Accuracy:", accuracy, "\n")
cat("Precision:", precision, "\n")
cat("Recall:", recall, "\n")
cat("F1 Score:", F1, "\n")
print(paste("Specificity:", specificity))[1] "AIC: 918.394533143904"
[1] "BIC: 993.683157859094"
Accuracy: 0.8975
Precision: 0.8879781
Recall: 0.9701493
F1 Score: 0.9272468
[1] "Specificity: 0.453333333333333"
Feature Selection/Interesting Findings
Final Conclusions
The following are features are predictors of D1A_1V1L: Internal: Primary purpose of trip: work/earn money - Last, Head.
D1A_4 2.102e-01 8.233e-02 2.553 0.010670 - D1A_4: Internal: Total number of trips - Head
N1_12 1.868e-04 6.995e-05 2.670 0.007590 - N1_12: Internal: Rent per month
N1_14 3.018e-04 4.956e-05 6.089 1.13e-09 *** -> N1_14: Internal: Average monthly remittances sent home
D1A_10AL_3M 1.371e-04 3.409e-05 4.021 5.80e-05*** -> D1A_10AL_3M: Internal: Wage(taka)- Last, Head Monthly
D1A_8F -1.485e-03 2.789e-04 -5.326 1.01e-07 *** -> D1A_8F: Internal: Duration of stay - First, Head
A13_3 -1.055e+00 4.906e-01 -2.150 0.031541*** **-> A13: Household: Level of education (Highest level passed) - 3 Class V (Complete PE)
A13_5 -1.523e+00 5.347e-01 -2.849 0.004385** -> A13: Household: Level of education (Highest level passed) - 5 SSC (Complete SE)
A13_6 -1.947e+00 6.565e-01 -2.965 0.003025 -> A13: Household: Level of education (Highest level passed) - 6 College (11 and 12 grades)
A13_7 -1.826e+00 5.520e-01 -3.309 0.000936 -> A13: Household: Level of education (Highest level passed) - 7 HSC (complete HSE)
A13_8 -2.470e+00 5.359e-01 -4.609 4.04e-06 -> A13: Household: Level of education (Highest level passed) - 8 University level
A14_7 1.500e+00 7.491e-01 2.003 0.045199 -> A14: Household: Livelihood/occupation - 7 Rickshaw driver/ Brick breaking/Road building/Construction worker/boatman/earth
A14_9 2.427e+00 8.715e-01 2.785 0.005354 -> 9 Non agricultural worker(factory worker, blue collar service)
A14_17 -2.769e+00 6.844e-01 -4.046 5.21e-05 -> 17 Homemaker
Note about AIC and BIC for Ridge, Lasso and Elastic Net
AIC and BIC are best suited for models where parameters are estimated via likelihood methods, typically in more traditional statistical models like linear and logistic regression models. For models incorporating regularization (like Ridge, Lasso and Elastic Net) or non-parametric models (like Decision Trees and Random Forests), these criteria are generally not applicable unless approximations or modifications to the original criteria are used. Hence, although we decided to use this as a method of comparison, it will lead to biased outcome.
Ridge
# prepare x and y
x <- model.matrix(Work_Earn_Money_1 ~ ., df_model)[, ]
y <- df_model$Work_Earn_Money_1
# split data into train and test sets
set.seed(1)
train <- sample(c(TRUE, FALSE), nrow(df_model), replace = TRUE, prob = c(0.8, 0.2))
test <- !train
y.test <- y[test]
# Ridge regression model
ridge.mod <- glmnet(x[train, ], y[train], alpha = 0, lambda.min.ratio = 0.000001)
# Cross-validation for selecting lambda
cv.out <- cv.glmnet(x[train, ], y[train], alpha = 0, lambda.min.ratio = 0.000001)
bestlam <- cv.out$lambda.min
# Predict the test data with the best lambda
ridge.pred <- predict(ridge.mod, s = bestlam, newx = x[test, ])
# Assuming predictions are probabilities, convert them to binary classes
predicted_classes <- ifelse(ridge.pred > 0.5, 1, 0)
# Confusion matrix
conf_matrix <- table(predicted_classes, y.test)
# Calculate Sensitivity (True Positive Rate)
sensitivity <- conf_matrix[2, 2] / sum(conf_matrix[2, ])
# Calculate Specificity (True Negative Rate)
specificity <- conf_matrix[1, 1] / sum(conf_matrix[1, ])
# Calculate Accuracy
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
# Calculate Precision (Positive Predictive Value)
precision <- conf_matrix[2, 2] / sum(conf_matrix[, 2])
# Calculate Recall (Same as Sensitivity)
recall <- sensitivity
# Calculate F1
F1 <- 2 * (precision * recall) / (precision + recall)
# Calculate False Positive Rate
fpr <- 1 - specificity
# Create ROC Curve
roc_curve_ridge <- roc(y.test, ridge.pred)
# Calculate AUC (Area Under Curve)
auc_value_ridge <- auc(roc_curve_ridge)
roc_curve <- roc(y.test, ridge.pred)
# Calculate AUC (Area Under Curve)
auc_value <- auc(roc_curve)
# Print the metrics
print("Metrics:")
print(paste("Sensitivity (True Positive Rate):", sensitivity))
print(paste("Specificity (True Negative Rate):", specificity))
print(paste("Accuracy:", accuracy))
print(paste("Precision (Positive Predictive Value):", precision))
print(paste("Recall:", recall))
print(paste("False Positive Rate:", fpr))
print(paste("F1:", F1))
print(paste("AUC (Area Under Curve):", auc_value_ridge))
# Plot ROC curve
plot(roc_curve_ridge, main = "ROC Curve", col = "blue")
print(paste("AUC (Area Under Curve):", auc_value))
# Plot ROC curve
plot(roc_curve, main = "ROC Curve", col = "blue")
coef(ridge.mod)Setting levels: control = 0, case = 1
Warning message in roc.default(y.test, ridge.pred):
"Deprecated use a matrix as predictor. Unexpected results may be produced, please pass a numeric vector."
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Warning message in roc.default(y.test, ridge.pred):
"Deprecated use a matrix as predictor. Unexpected results may be produced, please pass a numeric vector."
Setting direction: controls < cases
[[ suppressing 84 column names 's0', 's1', 's2' ... ]]
[1] "Metrics:"
[1] "Sensitivity (True Positive Rate): 0.917808219178082"
[1] "Specificity (True Negative Rate): 0.674418604651163"
[1] "Accuracy: 0.892156862745098"
[1] "Precision (Positive Predictive Value): 0.959885386819484"
[1] "Recall: 0.917808219178082"
[1] "False Positive Rate: 0.325581395348837"
[1] "F1: 0.938375350140056"
[1] "AUC (Area Under Curve): 0.886066728182216"
[1] "AUC (Area Under Curve): 0.886066728182216"
15 x 84 sparse Matrix of class "dgCMatrix"
(Intercept) 8.385678e-01 8.382639e-01 8.382188e-01
(Intercept) . . .
Number_Trips 2.324851e-38 5.618277e-05 6.454707e-05
Rent_per_Month 2.308136e-41 5.578948e-08 6.409709e-08
Monthly_Remittances 2.880487e-41 6.968164e-08 8.006785e-08
Wage_Last_Head 7.445401e-42 1.799846e-08 2.067902e-08
Duration_of_stay -9.695762e-41 -2.351791e-07 -2.703411e-07
Education_Level_3 -5.997023e-38 -1.449509e-04 -1.665353e-04
Education_Level_5 3.124446e-39 7.079255e-06 8.052202e-06
Education_Level_6 -1.557239e-37 -3.772600e-04 -4.335859e-04
Education_Level_7 -6.587286e-38 -1.601058e-04 -1.840992e-04
Education_Level_8 -6.543591e-38 -1.594983e-04 -1.834782e-04
Livelihood_Occupation_7 1.248436e-37 3.025307e-04 3.477130e-04
Livelihood_Occupation_9 1.509607e-37 3.655498e-04 4.200978e-04
Livelihood_Occupation_17 -6.955422e-37 -1.684579e-03 -1.936013e-03
(Intercept) 8.381670e-01 8.381075e-01 8.380394e-01
(Intercept) . . .
Number_Trips 7.414820e-05 8.516631e-05 9.780698e-05
Rent_per_Month 7.363373e-08 8.457863e-08 9.713638e-08
Monthly_Remittances 9.199374e-08 1.056849e-07 1.213990e-07
Wage_Last_Head 2.375626e-08 2.728808e-08 3.134058e-08
Duration_of_stay -3.107503e-07 -3.571866e-07 -4.105445e-07
Education_Level_3 -1.913127e-04 -2.197490e-04 -2.523755e-04
Education_Level_5 9.142965e-06 1.036035e-05 1.171166e-05
Education_Level_6 -4.982921e-04 -5.726159e-04 -6.579746e-04
Education_Level_7 -2.116911e-04 -2.434219e-04 -2.799135e-04
Education_Level_8 -2.110791e-04 -2.428528e-04 -2.794364e-04
Livelihood_Occupation_7 3.996219e-04 4.592517e-04 5.277420e-04
Livelihood_Occupation_9 4.827519e-04 5.547061e-04 6.373264e-04
Livelihood_Occupation_17 -2.224830e-03 -2.556542e-03 -2.937459e-03
(Intercept) 8.379613e-01 8.378718e-01 8.377694e-01
(Intercept) . . .
Number_Trips 1.123045e-04 1.289256e-04 1.479730e-04
Rent_per_Month 1.115402e-07 1.280556e-07 1.469845e-07
Monthly_Remittances 1.394304e-07 1.601147e-07 1.838340e-07
Wage_Last_Head 3.598910e-08 4.131949e-08 4.742932e-08
Duration_of_stay -4.718502e-07 -5.422800e-07 -6.231822e-07
Education_Level_3 -2.897982e-04 -3.327068e-04 -3.818857e-04
Education_Level_5 1.320164e-05 1.483072e-05 1.659369e-05
Education_Level_6 -7.559901e-04 -8.685180e-04 -9.976784e-04
Education_Level_7 -3.218815e-04 -3.701499e-04 -4.256664e-04
Education_Level_8 -3.215663e-04 -3.700946e-04 -4.260066e-04
Livelihood_Occupation_7 6.063972e-04 6.967100e-04 8.003872e-04
Livelihood_Occupation_9 7.321754e-04 8.410382e-04 9.659526e-04
Livelihood_Occupation_17 -3.374800e-03 -3.876816e-03 -4.452932e-03
(Intercept) 8.376521e-01 8.375181e-01 8.373650e-01
(Intercept) . . .
Number_Trips 1.697904e-04 1.947667e-04 2.233412e-04
Rent_per_Month 1.686694e-07 1.934984e-07 2.219101e-07
Monthly_Remittances 2.110231e-07 2.421757e-07 2.778512e-07
Wage_Last_Head 5.442941e-08 6.244532e-08 7.161902e-08
Duration_of_stay -7.161008e-07 -8.228034e-07 -9.453120e-07
Education_Level_3 -4.382250e-04 -5.027330e-04 -5.765493e-04
Education_Level_5 1.847582e-05 2.044968e-05 2.246978e-05
Education_Level_6 -1.145893e-03 -1.315923e-03 -1.510916e-03
Education_Level_7 -4.895225e-04 -5.629745e-04 -6.474690e-04
Education_Level_8 -4.904442e-04 -5.647308e-04 -6.504018e-04
Livelihood_Occupation_7 9.193786e-04 1.055909e-03 1.212516e-03
Livelihood_Occupation_9 1.109243e-03 1.273555e-03 1.461899e-03
Livelihood_Occupation_17 -5.113903e-03 -5.871986e-03 -6.741130e-03
(Intercept) 8.371903e-01 8.369911e-01 8.367642e-01
(Intercept) . . .
Number_Trips 2.560083e-04 2.933232e-04 3.359064e-04
Rent_per_Month 2.543988e-07 2.915202e-07 3.338963e-07
Monthly_Remittances 3.186824e-07 3.653831e-07 4.187560e-07
Wage_Last_Head 8.211062e-08 9.410019e-08 1.077895e-07
Duration_of_stay -1.085938e-06 -1.247320e-06 -1.432469e-06
Education_Level_3 -6.609585e-04 -7.574046e-04 -8.675050e-04
Education_Level_5 2.446580e-05 2.633352e-05 2.792301e-05
Education_Level_6 -1.734453e-03 -1.990601e-03 -2.283975e-03
Education_Level_7 -7.446717e-04 -8.565008e-04 -9.851654e-04
Education_Level_8 -7.492403e-04 -8.633189e-04 -9.950490e-04
Livelihood_Occupation_7 1.392087e-03 1.597903e-03 1.833689e-03
Livelihood_Occupation_9 1.677691e-03 1.924803e-03 2.207612e-03
Livelihood_Occupation_17 -7.737191e-03 -8.878155e-03 -1.018439e-02
(Intercept) 8.365063e-01 8.362135e-01 8.358817e-01
(Intercept) . . .
Number_Trips 3.844490e-04 4.397164e-04 5.025515e-04
Rent_per_Month 3.822210e-07 4.372649e-07 4.998784e-07
Monthly_Remittances 4.797010e-07 5.492227e-07 6.284378e-07
Wage_Last_Head 1.234038e-07 1.411934e-07 1.614351e-07
Duration_of_stay -1.644815e-06 -1.888260e-06 -2.167237e-06
Education_Level_3 -9.930641e-04 -1.136086e-03 -1.298786e-03
Education_Level_5 2.902318e-05 2.934181e-05 2.847990e-05
Education_Level_6 -2.619795e-03 -3.003957e-03 -3.443098e-03
Education_Level_7 -1.133209e-03 -1.303562e-03 -1.499597e-03
Education_Level_8 -1.147239e-03 -1.323161e-03 -1.526633e-03
Livelihood_Occupation_7 2.103659e-03 2.412573e-03 2.765789e-03
Livelihood_Occupation_9 2.531052e-03 2.900668e-03 3.322673e-03
Livelihood_Occupation_17 -1.167890e-02 -1.338759e-02 -1.533958e-02
(Intercept) 8.355065e-01 8.350834e-01 8.346074e-01
(Intercept) . . .
Number_Trips 5.738760e-04 6.546891e-04 7.460643e-04
Rent_per_Month 5.709950e-07 6.516310e-07 7.428841e-07
Monthly_Remittances 7.185805e-07 8.210069e-07 9.371954e-07
Wage_Last_Head 1.844329e-07 2.105184e-07 2.400507e-07
Duration_of_stay -2.486767e-06 -2.852532e-06 -3.270935e-06
Education_Level_3 -1.483594e-03 -1.693163e-03 -1.930360e-03
Education_Level_5 2.589893e-05 2.087948e-05 1.246938e-05
Education_Level_6 -3.944672e-03 -4.517014e-03 -5.169411e-03
Education_Level_7 -1.725196e-03 -1.984826e-03 -2.283622e-03
Education_Level_8 -1.762109e-03 -2.034791e-03 -2.350748e-03
Livelihood_Occupation_7 3.169318e-03 3.629872e-03 4.154914e-03
Livelihood_Occupation_9 3.803994e-03 4.352318e-03 4.976126e-03
Livelihood_Occupation_17 -1.756743e-02 -2.010744e-02 -2.299987e-02
(Intercept) 8.340737e-01 8.334775e-01 8.328141e-01
(Intercept) . . .
Number_Trips 8.491404e-04 9.651092e-04 1.095196e-03
Rent_per_Month 8.459274e-07 9.619999e-07 1.092391e-06
Monthly_Remittances 1.068745e-06 1.217364e-06 1.384861e-06
Wage_Last_Head 2.734140e-07 3.110158e-07 3.532815e-07
Duration_of_stay -3.749177e-06 -4.295317e-06 -4.918340e-06
Education_Level_3 -2.198259e-03 -2.500116e-03 -2.839338e-03
Education_Level_5 -5.805894e-07 -1.989550e-05 -4.757148e-05
Education_Level_6 -5.912160e-03 -6.756619e-03 -7.715236e-03
Education_Level_7 -2.627483e-03 -3.023177e-03 -3.478451e-03
Education_Level_8 -2.717063e-03 -3.141988e-03 -3.635126e-03
Livelihood_Occupation_7 4.752689e-03 5.432238e-03 6.203392e-03
Livelihood_Occupation_9 5.684710e-03 6.488166e-03 7.397354e-03
Livelihood_Occupation_17 -2.628910e-02 -3.002378e-02 -3.425681e-02
(Intercept) 8.320794e-01 8.312701e-01 8.303839e-01
(Intercept) . . .
Number_Trips 1.240633e-03 1.402626e-03 1.582307e-03
Rent_per_Month 1.238421e-06 1.401411e-06 1.582649e-06
Monthly_Remittances 1.573116e-06 1.784050e-06 2.019579e-06
Wage_Last_Head 4.006485e-07 4.535576e-07 5.124421e-07
Duration_of_stay -5.628205e-06 -6.435882e-06 -7.353364e-06
Education_Level_3 -3.219437e-03 -3.643968e-03 -4.116456e-03
Education_Level_5 -8.628902e-05 -1.394453e-04 -2.113051e-04
Education_Level_6 -8.801558e-03 -1.003021e-02 -1.141681e-02
Education_Level_7 -4.002158e-03 -4.604380e-03 -5.296558e-03
Education_Level_8 -4.207619e-03 -4.872348e-03 -5.644136e-03
Livelihood_Occupation_7 7.076724e-03 8.063454e-03 9.175292e-03
Livelihood_Occupation_9 8.423820e-03 9.579651e-03 1.087728e-02
Livelihood_Occupation_17 -3.904521e-02 -4.444976e-02 -5.053450e-02
(Intercept) 8.294200e-01 8.283799e-01 8.272672e-01
(Intercept) . . .
Number_Trips 1.780688e-03 1.998595e-03 2.236604e-03
Rent_per_Month 1.783347e-06 2.004596e-06 2.247304e-06
Monthly_Remittances 2.281557e-06 2.571703e-06 2.891515e-06
Wage_Last_Head 5.777151e-07 6.497546e-07 7.288867e-07
Duration_of_stay -8.393637e-06 -9.570611e-06 -1.089899e-05
Education_Level_3 -4.640307e-03 -5.218709e-03 -5.854533e-03
Education_Level_5 -3.071692e-04 -4.335539e-04 -5.983771e-04
Education_Level_6 -1.297790e-02 -1.473070e-02 -1.669293e-02
Education_Level_7 -6.091607e-03 -7.004027e-03 -8.049968e-03
Education_Level_8 -6.539936e-03 -7.578984e-03 -8.782908e-03
Livelihood_Occupation_7 1.042420e-02 1.182204e-02 1.338014e-02
Livelihood_Occupation_9 1.232916e-02 1.394744e-02 1.574340e-02
Livelihood_Occupation_17 -5.736584e-02 -6.501138e-02 -7.353840e-02
(Intercept) 8.260886e-01 8.248543e-01 8.235783e-01
(Intercept) . . .
Number_Trips 2.494967e-03 2.773544e-03 3.071736e-03
Rent_per_Month 2.512148e-06 2.799511e-06 3.109435e-06
Monthly_Remittances 3.242172e-06 3.624424e-06 4.038477e-06
Wage_Last_Head 8.153700e-07 9.093783e-07 1.010988e-06
Duration_of_stay -1.239404e-05 -1.407132e-05 -1.594626e-05
Education_Level_3 -6.550234e-03 -7.307763e-03 -8.128506e-03
Education_Level_5 -8.111367e-04 -1.083067e-03 -1.427256e-03
Education_Level_6 -1.888243e-02 -2.131679e-02 -2.401283e-02
Education_Level_7 -9.247255e-03 -1.061535e-02 -1.217521e-02
Education_Level_8 -1.017574e-02 -1.178381e-02 -1.363551e-02
Livelihood_Occupation_7 1.510869e-02 1.701605e-02 1.910787e-02
Livelihood_Occupation_9 1.772686e-02 1.990546e-02 2.228385e-02
Livelihood_Occupation_17 -8.301185e-02 -9.349197e-02 -1.050315e-01
(Intercept) 8.222783e-01 8.209765e-01 8.196987e-01
(Intercept) . . .
Number_Trips 3.388496e-03 3.722086e-03 4.070348e-03
Rent_per_Month 3.441586e-06 3.795176e-06 4.169010e-06
Monthly_Remittances 4.483872e-06 4.959404e-06 5.463007e-06
Wage_Last_Head 1.120185e-06 1.236801e-06 1.360599e-06
Duration_of_stay -1.803363e-05 -2.034691e-05 -2.289758e-05
Education_Level_3 -9.013575e-03 -9.962711e-03 -1.097583e-02
Education_Level_5 -1.859023e-03 -2.394735e-03 -3.053170e-03
Education_Level_6 -2.698629e-02 -3.025033e-02 -3.381597e-02
Education_Level_7 -1.394931e-02 -1.596030e-02 -1.823170e-02
Education_Level_8 -1.576093e-02 -1.819083e-02 -2.095624e-02
Livelihood_Occupation_7 2.138612e-02 2.384813e-02 2.648553e-02
Livelihood_Occupation_9 2.486278e-02 2.763817e-02 3.060031e-02
Livelihood_Occupation_17 -1.176725e-01 -1.314432e-01 -1.463542e-01
(Intercept) 8.184738e-01 8.173334e-01 8.163102e-01
(Intercept) . . .
Number_Trips 4.430562e-03 4.799548e-03 5.173756e-03
Rent_per_Month 4.561451e-06 4.970439e-06 5.393523e-06
Monthly_Remittances 5.991716e-06 6.541652e-06 7.108055e-06
Wage_Last_Head 1.491230e-06 1.628272e-06 1.771247e-06
Duration_of_stay -2.569428e-05 -2.874195e-05 -3.204101e-05
Education_Level_3 -1.205221e-02 -1.319086e-02 -1.439058e-02
Education_Level_5 -3.854295e-03 -4.819005e-03 -5.968482e-03
Education_Level_6 -3.769072e-02 -4.187804e-02 -4.637667e-02
Education_Level_7 -2.078642e-02 -2.364604e-02 -2.682969e-02
Education_Level_8 -2.408702e-02 -2.761060e-02 -3.155052e-02
Livelihood_Occupation_7 2.928336e-02 3.221945e-02 3.526409e-02
Livelihood_Occupation_9 3.373316e-02 3.701388e-02 4.041273e-02
Livelihood_Occupation_17 -1.623953e-01 -1.795328e-01 -1.977071e-01
(Intercept) 8.154369e-01 8.147444e-01 8.142602e-01
(Intercept) . . .
Number_Trips 5.549395e-03 5.922573e-03 6.289457e-03
Rent_per_Month 5.827899e-06 6.270454e-06 6.717814e-06
Monthly_Remittances 7.685380e-06 8.267449e-06 8.847650e-06
Wage_Last_Head 1.919655e-06 2.072999e-06 2.230812e-06
Duration_of_stay -3.558661e-05 -3.936805e-05 -4.336845e-05
Education_Level_3 -1.565014e-02 -1.696830e-02 -1.834379e-02
Education_Level_5 -7.323377e-03 -8.902840e-03 -1.072344e-02
Education_Level_6 -5.118012e-02 -5.627630e-02 -6.164724e-02
Education_Level_7 -3.035295e-02 -3.422656e-02 -3.845524e-02
Education_Level_8 -3.592483e-02 -4.074461e-02 -4.601253e-02
Livelihood_Occupation_7 3.838026e-02 4.152443e-02 4.464801e-02
Livelihood_Occupation_9 4.389352e-02 4.741440e-02 5.092934e-02
Livelihood_Occupation_17 -2.168314e-01 -2.367921e-01 -2.574497e-01
(Intercept) 8.140068e-01 8.140005e-01 8.142502e-01
(Intercept) . . .
Number_Trips 6.646422e-03 6.990189e-03 7.317933e-03
Rent_per_Month 7.166391e-06 7.612437e-06 8.052100e-06
Monthly_Remittances 9.419185e-06 9.975340e-06 1.050976e-05
Wage_Last_Head 2.392673e-06 2.558210e-06 2.727092e-06
Duration_of_stay -4.756470e-05 -5.192784e-05 -5.642373e-05
Education_Level_3 -1.977519e-02 -2.126074e-02 -2.279805e-02
Education_Level_5 -1.279801e-02 -1.513451e-02 -1.773498e-02
Education_Level_6 -6.726918e-02 -7.311271e-02 -7.914327e-02
Education_Level_7 -4.303655e-02 -4.796006e-02 -5.320673e-02
Education_Level_8 -5.172179e-02 -5.785524e-02 -6.438506e-02
Livelihood_Occupation_7 4.769939e-02 5.062646e-02 5.337941e-02
Livelihood_Occupation_9 5.438999e-02 5.774787e-02 6.095682e-02
Livelihood_Occupation_17 -2.786426e-01 -3.001908e-01 -3.219018e-01
(Intercept) 8.147571e-01 8.155142e-01 8.165134e-01
(Intercept) . . .
Number_Trips 7.627359e-03 7.916738e-03 8.184604e-03
Rent_per_Month 8.481500e-06 8.896816e-06 9.291077e-06
Monthly_Remittances 1.101673e-05 1.149136e-05 1.192815e-05
Wage_Last_Head 2.899007e-06 3.073633e-06 3.251172e-06
Duration_of_stay -6.101408e-05 -6.565776e-05 -7.031273e-05
Education_Level_3 -2.438376e-02 -2.601324e-02 -2.768723e-02
Education_Level_5 -2.059472e-02 -2.370169e-02 -2.704191e-02
Education_Level_6 -8.532176e-02 -9.160539e-02 -9.795535e-02
Education_Level_7 -5.874872e-02 -6.454964e-02 -7.057008e-02
Education_Level_8 -7.127287e-02 -7.847031e-02 -8.592235e-02
Livelihood_Occupation_7 5.591353e-02 5.819174e-02 6.018674e-02
Livelihood_Occupation_9 6.397531e-02 6.676847e-02 6.931066e-02
Livelihood_Occupation_17 -3.435776e-01 -3.650212e-01 -3.860440e-01
(Intercept) 8.177230e-01 8.191208e-01 8.206759e-01
(Intercept) . . .
Number_Trips 8.430714e-03 8.654767e-03 8.857046e-03
Rent_per_Month 9.666344e-06 1.001732e-05 1.034164e-05
Monthly_Remittances 1.232707e-05 1.268541e-05 1.300274e-05
Wage_Last_Head 3.430238e-06 3.610656e-06 3.791710e-06
Duration_of_stay -7.493601e-05 -7.948719e-05 -8.392849e-05
Education_Level_3 -2.938629e-02 -3.110601e-02 -3.283533e-02
Education_Level_5 -3.057946e-02 -3.428509e-02 -3.812014e-02
Education_Level_6 -1.043137e-01 -1.106375e-01 -1.168800e-01
Education_Level_7 -7.675141e-02 -8.304108e-02 -8.937992e-02
Education_Level_8 -9.356118e-02 -1.013185e-01 -1.091214e-01
Livelihood_Occupation_7 6.188149e-02 6.327064e-02 6.435941e-02
Livelihood_Occupation_9 7.158294e-02 7.357805e-02 7.529710e-02
Livelihood_Occupation_17 -4.064710e-01 -4.261479e-01 -4.449437e-01
(Intercept) 8.223548e-01 8.241230e-01 8.259465e-01
(Intercept) . . .
Number_Trips 9.038195e-03 9.199151e-03 9.341077e-03
Rent_per_Month 1.063757e-05 1.090413e-05 1.114109e-05
Monthly_Remittances 1.327971e-05 1.351792e-05 1.371968e-05
Wage_Last_Head 3.972539e-06 4.152149e-06 4.329450e-06
Duration_of_stay -8.822615e-05 -9.235121e-05 -9.627995e-05
Education_Level_3 -3.456198e-02 -3.627289e-02 -3.795476e-02
Education_Level_5 -4.204200e-02 -4.600590e-02 -4.996673e-02
Education_Level_6 -1.229962e-01 -1.289439e-01 -1.346852e-01
Education_Level_7 -9.570756e-02 -1.019646e-01 -1.080945e-01
Education_Level_8 -1.168965e-01 -1.245720e-01 -1.320801e-01
Livelihood_Occupation_7 6.516252e-02 6.570248e-02 6.600751e-02
Livelihood_Occupation_9 7.674924e-02 7.795034e-02 7.892124e-02
Livelihood_Occupation_17 -4.627539e-01 -4.795012e-01 -4.951352e-01
(Intercept) 8.277929e-01 8.296329e-01 8.314403e-01
(Intercept) . . .
Number_Trips 9.465300e-03 9.573253e-03 9.666428e-03
Rent_per_Month 1.134892e-05 1.152876e-05 1.168228e-05
Monthly_Remittances 1.388788e-05 1.402580e-05 1.413691e-05
Wage_Last_Head 4.503292e-06 4.672526e-06 4.836048e-06
Duration_of_stay -9.999412e-05 -1.034809e-04 -1.067325e-04
Education_Level_3 -3.959463e-02 -4.118041e-02 -4.270138e-02
Education_Level_5 -5.388090e-02 -5.770794e-02 -6.141184e-02
Education_Level_6 -1.401869e-01 -1.454212e-01 -1.503666e-01
Education_Level_7 -1.140456e-01 -1.197726e-01 -1.252375e-01
Education_Level_8 -1.393594e-01 -1.463560e-01 -1.530252e-01
Livelihood_Occupation_7 6.610936e-02 6.604130e-02 6.583633e-02
Livelihood_Occupation_9 7.968613e-02 8.027089e-02 8.070166e-02
Livelihood_Occupation_17 -5.096308e-01 -5.229859e-01 -5.352177e-01
(Intercept) 8.331932e-01 8.348868e-01 8.364680e-01
(Intercept) . . .
Number_Trips 9.746331e-03 9.812849e-03 9.872174e-03
Rent_per_Month 1.181156e-05 1.191210e-05 1.200698e-05
Monthly_Remittances 1.422470e-05 1.428933e-05 1.434373e-05
Wage_Last_Head 4.992856e-06 5.143496e-06 5.283061e-06
Duration_of_stay -1.097460e-04 -1.125256e-04 -1.150668e-04
Education_Level_3 -4.414851e-02 -4.552413e-02 -4.679496e-02
Education_Level_5 -6.496208e-02 -6.834279e-02 -7.150966e-02
Education_Level_6 -1.550075e-01 -1.593480e-01 -1.633436e-01
Education_Level_7 -1.304101e-01 -1.352785e-01 -1.397992e-01
Education_Level_8 -1.593320e-01 -1.652576e-01 -1.707667e-01
Livelihood_Occupation_7 6.552569e-02 6.513840e-02 6.469806e-02
Livelihood_Occupation_9 8.100372e-02 8.120278e-02 8.131352e-02
Livelihood_Occupation_17 -5.463602e-01 -5.564607e-01 -5.655729e-01
(Intercept) 8.379664e-01 8.393629e-01 8.406545e-01
(Intercept) . . .
Number_Trips 9.920876e-03 9.961779e-03 9.996009e-03
Rent_per_Month 1.207815e-05 1.213488e-05 1.217946e-05
Monthly_Remittances 1.438112e-05 1.440736e-05 1.442472e-05
Wage_Last_Head 5.415258e-06 5.538377e-06 5.652287e-06
Duration_of_stay -1.173864e-04 -1.194912e-04 -1.213928e-04
Education_Level_3 -4.798618e-02 -4.908727e-02 -5.009876e-02
Education_Level_5 -7.447644e-02 -7.722786e-02 -7.976233e-02
Education_Level_6 -1.670366e-01 -1.704197e-01 -1.735029e-01
Education_Level_7 -1.439954e-01 -1.478574e-01 -1.513911e-01
Education_Level_8 -1.758721e-01 -1.805685e-01 -1.848638e-01
Livelihood_Occupation_7 6.422752e-02 6.374388e-02 6.326118e-02
Livelihood_Occupation_9 8.136114e-02 8.135938e-02 8.132157e-02
Livelihood_Occupation_17 -5.737619e-01 -5.810932e-01 -5.876346e-01
(Intercept) 8.418407e-01 8.429234e-01 8.439063e-01
(Intercept) . . .
Number_Trips 1.002457e-02 1.004835e-02 1.006811e-02
Rent_per_Month 1.221396e-05 1.224023e-05 1.225987e-05
Monthly_Remittances 1.443518e-05 1.444037e-05 1.444164e-05
Wage_Last_Head 5.757027e-06 5.852786e-06 5.939869e-06
Duration_of_stay -1.231040e-04 -1.246383e-04 -1.260094e-04
Education_Level_3 -5.102265e-02 -5.186210e-02 -5.262118e-02
Education_Level_5 -8.208250e-02 -8.419453e-02 -8.610728e-02
Education_Level_6 -1.762993e-01 -1.788244e-01 -1.810954e-01
Education_Level_7 -1.546072e-01 -1.575197e-01 -1.601456e-01
Education_Level_8 -1.887713e-01 -1.923089e-01 -1.954974e-01
Livelihood_Occupation_7 6.279014e-02 6.233865e-02 6.191214e-02
Livelihood_Occupation_9 8.125859e-02 8.117912e-02 8.108996e-02
Livelihood_Occupation_17 -5.934539e-01 -5.986167e-01 -6.031862e-01
(Intercept) 8.447940e-01 8.455922e-01 8.463070e-01
(Intercept) . . .
Number_Trips 1.008453e-02 1.009816e-02 1.010948e-02
Rent_per_Month 1.227424e-05 1.228447e-05 1.229152e-05
Monthly_Remittances 1.444008e-05 1.443656e-05 1.443175e-05
Wage_Last_Head 6.018678e-06 6.089686e-06 6.153407e-06
Duration_of_stay -1.272309e-04 -1.283162e-04 -1.292782e-04
Education_Level_3 -5.330458e-02 -5.391744e-02 -5.446508e-02
Education_Level_5 -8.783154e-02 -8.937944e-02 -9.076386e-02
Education_Level_6 -1.831303e-01 -1.849474e-01 -1.865653e-01
Education_Level_7 -1.625034e-01 -1.646129e-01 -1.664939e-01
Education_Level_8 -1.983597e-01 -2.009199e-01 -2.032025e-01
Livelihood_Occupation_7 6.151403e-02 6.114613e-02 6.080902e-02
Livelihood_Occupation_9 8.099628e-02 8.090190e-02 8.080960e-02
Livelihood_Occupation_17 -6.072221e-01 -6.107799e-01 -6.139111e-01
(Intercept) 8.469450e-01 8.474946e-01 8.479995e-01
(Intercept) . . .
Number_Trips 1.011889e-02 1.012917e-02 1.013551e-02
Rent_per_Month 1.229614e-05 1.230770e-05 1.230853e-05
Monthly_Remittances 1.442618e-05 1.442437e-05 1.441802e-05
Wage_Last_Head 6.210384e-06 6.258729e-06 6.304041e-06
Duration_of_stay -1.301289e-04 -1.308727e-04 -1.315348e-04
Education_Level_3 -5.495289e-02 -5.536777e-02 -5.575329e-02
Education_Level_5 -9.199794e-02 -9.307368e-02 -9.404772e-02
Education_Level_6 -1.880019e-01 -1.892479e-01 -1.903746e-01
Education_Level_7 -1.681663e-01 -1.696268e-01 -1.709406e-01
Education_Level_8 -2.052315e-01 -2.070117e-01 -2.086046e-01
Livelihood_Occupation_7 6.050232e-02 6.022514e-02 5.997570e-02
Livelihood_Occupation_9 8.072126e-02 8.063543e-02 8.055836e-02
Livelihood_Occupation_17 -6.166628e-01 -6.190791e-01 -6.211961e-01
(Intercept) 8.484469e-01 8.488420e-01 8.491678e-01
(Intercept) . . .
Number_Trips 1.014074e-02 1.014510e-02 1.015126e-02
Rent_per_Month 1.230818e-05 1.230722e-05 1.231576e-05
Monthly_Remittances 1.441167e-05 1.440558e-05 1.440486e-05
Wage_Last_Head 6.344268e-06 6.379853e-06 6.407789e-06
Duration_of_stay -1.321174e-04 -1.326293e-04 -1.330681e-04
Education_Level_3 -5.609438e-02 -5.639538e-02 -5.663052e-02
Education_Level_5 -9.490950e-02 -9.567017e-02 -9.630061e-02
Education_Level_6 -1.913691e-01 -1.922450e-01 -1.929730e-01
Education_Level_7 -1.721010e-01 -1.731237e-01 -1.739829e-01
Education_Level_8 -2.100110e-01 -2.112505e-01 -2.123048e-01
Livelihood_Occupation_7 5.975233e-02 5.955309e-02 5.937887e-02
Livelihood_Occupation_9 8.048758e-02 8.042305e-02 8.036280e-02
Livelihood_Occupation_17 -6.230501e-01 -6.246724e-01 -6.260946e-01
(Intercept) 8.494747e-01 8.497467e-01 8.499855e-01
(Intercept) . . .
Number_Trips 1.015432e-02 1.015660e-02 1.015850e-02
Rent_per_Month 1.231424e-05 1.231146e-05 1.230870e-05
Monthly_Remittances 1.439953e-05 1.439409e-05 1.438912e-05
Wage_Last_Head 6.435597e-06 6.460320e-06 6.482061e-06
Duration_of_stay -1.334621e-04 -1.338079e-04 -1.341105e-04
Education_Level_3 -5.686580e-02 -5.707381e-02 -5.725639e-02
Education_Level_5 -9.689314e-02 -9.741516e-02 -9.787327e-02
Education_Level_6 -1.936517e-01 -1.942494e-01 -1.947734e-01
Education_Level_7 -1.747762e-01 -1.754743e-01 -1.760863e-01
Education_Level_8 -2.132653e-01 -2.141097e-01 -2.148499e-01
Livelihood_Occupation_7 5.922150e-02 5.908238e-02 5.895964e-02
Livelihood_Occupation_9 8.031008e-02 8.026322e-02 8.022144e-02
Livelihood_Occupation_17 -6.273334e-01 -6.284153e-01 -6.293595e-01
(Intercept) 8.501948e-01 8.503782e-01 8.505387e-01
(Intercept) . . .
Number_Trips 1.016008e-02 1.016141e-02 1.016253e-02
Rent_per_Month 1.230609e-05 1.230366e-05 1.230141e-05
Monthly_Remittances 1.438462e-05 1.438058e-05 1.437697e-05
Wage_Last_Head 6.501142e-06 6.517870e-06 6.532520e-06
Duration_of_stay -1.343751e-04 -1.346063e-04 -1.348082e-04
Education_Level_3 -5.741641e-02 -5.755654e-02 -5.767913e-02
Education_Level_5 -9.827481e-02 -9.862640e-02 -9.893400e-02
Education_Level_6 -1.952321e-01 -1.956335e-01 -1.959843e-01
Education_Level_7 -1.766224e-01 -1.770914e-01 -1.775015e-01
Education_Level_8 -2.154981e-01 -2.160652e-01 -2.165611e-01
Livelihood_Occupation_7 5.885153e-02 5.875648e-02 5.867303e-02
Livelihood_Occupation_9 8.018430e-02 8.015140e-02 8.012232e-02
Livelihood_Occupation_17 -6.301833e-01 -6.309017e-01 -6.315279e-01
(Intercept) 8.506791e-01 8.508017e-01 8.508936e-01
(Intercept) . . .
Number_Trips 1.016348e-02 1.016428e-02 1.016655e-02
Rent_per_Month 1.229936e-05 1.229751e-05 1.230068e-05
Monthly_Remittances 1.437375e-05 1.437090e-05 1.437112e-05
Wage_Last_Head 6.545338e-06 6.556546e-06 6.564103e-06
Duration_of_stay -1.349844e-04 -1.351382e-04 -1.352652e-04
Education_Level_3 -5.778630e-02 -5.787994e-02 -5.794314e-02
Education_Level_5 -9.920291e-02 -9.943784e-02 -9.961023e-02
Education_Level_6 -1.962908e-01 -1.965584e-01 -1.967618e-01
Education_Level_7 -1.778598e-01 -1.781728e-01 -1.784133e-01
Education_Level_8 -2.169943e-01 -2.173726e-01 -2.176706e-01
Livelihood_Occupation_7 5.859985e-02 5.853575e-02 5.848729e-02
Livelihood_Occupation_9 8.009667e-02 8.007410e-02 8.005633e-02
Livelihood_Occupation_17 -6.320737e-01 -6.325492e-01 -6.329651e-01
# Call lambda values and corresponding test MSEs
lambda_values <- cv.out$lambda
test_mses <- cv.out$cvm
# Plot the test MSE as a function of the log of the regularization
# parameter (i.e. log($\lambda$)) for several orders of magnitude.
plot(log(lambda_values), test_mses, type = "b",
xlab = "log(lambda)", ylab = "Test MSE",
main = "Test MSE vs. log(lambda) of Ridge Regression")Lasso
# Train Lasso regression model
lasso.mod <- glmnet(x[train, ], y[train], alpha = 1, lambda.min.ratio = 0.000001)
# Cross-validation for selecting lambda
cv.out <- cv.glmnet(x[train, ], y[train], alpha = 1, lambda.min.ratio = 0.000001)
# Find lambda that minimizes training MSE
bestlam <- cv.out$lambda.min
# Predict the test data with the best lambda
lasso.pred <- predict(lasso.mod, s = bestlam, newx = x[test, ])
# Assuming predictions are probabilities, convert them to binary classes
predicted_classes <- ifelse(lasso.pred > 0.5, 1, 0)
# Confusion matrix
conf_matrix <- table(predicted_classes, y[test])
# Calculate Sensitivity (True Positive Rate)
sensitivity <- conf_matrix[2, 2] / sum(conf_matrix[2, ])
# Calculate Specificity (True Negative Rate)
specificity <- conf_matrix[1, 1] / sum(conf_matrix[1, ])
# Calculate Accuracy
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
# Calculate Precision (Positive Predictive Value)
precision <- conf_matrix[2, 2] / sum(conf_matrix[, 2])
# Calculate Recall (Same as Sensitivity)
recall <- sensitivity
# Calculate False Positive Rate
fpr <- 1 - specificity
F1 <- 2 * (precision * recall) / (precision + recall)
# Create ROC Curve
roc_curve_lasso <- roc(y[test], lasso.pred)
# Calculate AUC (Area Under Curve)
auc_value_lasso <- auc(roc_curve_lasso)
roc_curve <- roc(y[test], lasso.pred)
# Calculate AUC (Area Under Curve)
auc_value <- auc(roc_curve)
# Print the metrics
print("Metrics:")
print(paste("Sensitivity (True Positive Rate):", sensitivity))
print(paste("Specificity (True Negative Rate):", specificity))
print(paste("Accuracy:", accuracy))
print(paste("Precision (Positive Predictive Value):", precision))
print(paste("Recall:", recall))
print(paste("False Positive Rate:", fpr))
print(paste("F1:", F1))
print(paste("AUC (Area Under Curve):", auc_value_lasso))
# Plot ROC curve
plot(roc_curve_lasso, main = "ROC Curve", col = "blue")
# Save AUC
auc_lasso <- round(auc(roc_curve_lasso), 2)
print(paste("AUC (Area Under Curve):", auc_value))
# Plot ROC curve
plot(roc_curve, main = "ROC Curve", col = "blue")
coef(lasso.mod)Setting levels: control = 0, case = 1
Warning message in roc.default(y[test], lasso.pred):
"Deprecated use a matrix as predictor. Unexpected results may be produced, please pass a numeric vector."
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Warning message in roc.default(y[test], lasso.pred):
"Deprecated use a matrix as predictor. Unexpected results may be produced, please pass a numeric vector."
Setting direction: controls < cases
[[ suppressing 48 column names 's0', 's1', 's2' ... ]]
[1] "Metrics:"
[1] "Sensitivity (True Positive Rate): 0.917808219178082"
[1] "Specificity (True Negative Rate): 0.674418604651163"
[1] "Accuracy: 0.892156862745098"
[1] "Precision (Positive Predictive Value): 0.959885386819484"
[1] "Recall: 0.917808219178082"
[1] "False Positive Rate: 0.325581395348837"
[1] "F1: 0.938375350140056"
[1] "AUC (Area Under Curve): 0.885095430042251"
[1] "AUC (Area Under Curve): 0.885095430042251"
15 x 48 sparse Matrix of class "dgCMatrix"
(Intercept) 0.8385678 0.84459594 0.8498389 0.8543989 0.8583650
(Intercept) . . . . .
Number_Trips . . . . .
Rent_per_Month . . . . .
Monthly_Remittances . . . . .
Wage_Last_Head . . . . .
Duration_of_stay . . . . .
Education_Level_3 . . . . .
Education_Level_5 . . . . .
Education_Level_6 . . . . .
Education_Level_7 . . . . .
Education_Level_8 . . . . .
Livelihood_Occupation_7 . . . . .
Livelihood_Occupation_9 . . . . .
Livelihood_Occupation_17 . -0.08968912 -0.1676961 -0.2355427 -0.2945521
(Intercept) 0.8618145 8.621181e-01 8.581691e-01 8.547354e-01
(Intercept) . . . .
Number_Trips . . . .
Rent_per_Month . . . .
Monthly_Remittances . 1.074111e-06 3.686501e-06 5.958208e-06
Wage_Last_Head . . . .
Duration_of_stay . . . .
Education_Level_3 . . . .
Education_Level_5 . . . .
Education_Level_6 . . . .
Education_Level_7 . . . .
Education_Level_8 . . . .
Livelihood_Occupation_7 . . . .
Livelihood_Occupation_9 . . . .
Livelihood_Occupation_17 -0.3458755 -3.884599e-01 -4.222882e-01 -4.517112e-01
(Intercept) 8.517490e-01 8.489275e-01 8.461071e-01
(Intercept) . . .
Number_Trips . . .
Rent_per_Month . . .
Monthly_Remittances 7.934023e-06 9.607007e-06 1.057038e-05
Wage_Last_Head . 4.548321e-08 6.156493e-07
Duration_of_stay . . .
Education_Level_3 . . .
Education_Level_5 . . .
Education_Level_6 . . .
Education_Level_7 . . .
Education_Level_8 . . -1.184768e-02
Livelihood_Occupation_7 . . .
Livelihood_Occupation_9 . . .
Livelihood_Occupation_17 -4.773018e-01 -4.993742e-01 -5.181312e-01
(Intercept) 8.467157e-01 8.481282e-01 8.496150e-01
(Intercept) . . .
Number_Trips . . .
Rent_per_Month . . 1.170596e-06
Monthly_Remittances 1.121465e-05 1.175257e-05 1.223143e-05
Wage_Last_Head 1.347024e-06 2.009614e-06 2.558577e-06
Duration_of_stay -1.267819e-05 -2.934298e-05 -4.437984e-05
Education_Level_3 . . .
Education_Level_5 . . .
Education_Level_6 . . .
Education_Level_7 . . -1.421143e-02
Education_Level_8 -3.270034e-02 -5.098026e-02 -6.899918e-02
Livelihood_Occupation_7 . . .
Livelihood_Occupation_9 . . 4.382115e-04
Livelihood_Occupation_17 -5.356182e-01 -5.510074e-01 -5.648320e-01
(Intercept) 8.495092e-01 8.477561e-01 8.461447e-01
(Intercept) . . .
Number_Trips 4.901507e-04 1.508021e-03 2.416258e-03
Rent_per_Month 2.639183e-06 3.838212e-06 4.920779e-06
Monthly_Remittances 1.255319e-05 1.277602e-05 1.298753e-05
Wage_Last_Head 3.014969e-06 3.422343e-06 3.766555e-06
Duration_of_stay -5.730624e-05 -6.745751e-05 -7.624610e-05
Education_Level_3 . . .
Education_Level_5 . . .
Education_Level_6 -2.572668e-03 -2.507511e-02 -4.457094e-02
Education_Level_7 -3.245827e-02 -4.833310e-02 -6.208087e-02
Education_Level_8 -8.497298e-02 -9.879766e-02 -1.107763e-01
Livelihood_Occupation_7 . 8.648452e-03 1.620796e-02
Livelihood_Occupation_9 1.001095e-02 1.952200e-02 2.778820e-02
Livelihood_Occupation_17 -5.762271e-01 -5.849279e-01 -5.924845e-01
(Intercept) 8.447970e-01 8.444533e-01 8.443245e-01
(Intercept) . . .
Number_Trips 3.191901e-03 4.094501e-03 4.925224e-03
Rent_per_Month 5.834782e-06 6.644101e-06 7.350972e-06
Monthly_Remittances 1.315921e-05 1.329222e-05 1.340360e-05
Wage_Last_Head 4.072411e-06 4.392479e-06 4.683174e-06
Duration_of_stay -8.391429e-05 -9.070730e-05 -9.664481e-05
Education_Level_3 . . -1.456080e-05
Education_Level_5 . -9.986517e-03 -2.067954e-02
Education_Level_6 -6.157157e-02 -7.786357e-02 -9.236055e-02
Education_Level_7 -7.406711e-02 -8.636265e-02 -9.746447e-02
Education_Level_8 -1.212150e-01 -1.324795e-01 -1.427445e-01
Livelihood_Occupation_7 2.278798e-02 2.740148e-02 3.118295e-02
Livelihood_Occupation_9 3.498759e-02 4.058860e-02 4.532205e-02
Livelihood_Occupation_17 -5.990617e-01 -6.047094e-01 -6.096050e-01
(Intercept) 8.452709e-01 8.460998e-01 8.468207e-01
(Intercept) . . .
Number_Trips 5.609173e-03 6.203110e-03 6.719684e-03
Rent_per_Month 7.996488e-06 8.554852e-06 9.040484e-06
Monthly_Remittances 1.352848e-05 1.363554e-05 1.372865e-05
Wage_Last_Head 4.936385e-06 5.157347e-06 5.349529e-06
Duration_of_stay -1.017914e-04 -1.062700e-04 -1.101653e-04
Education_Level_3 -7.633529e-03 -1.426094e-02 -2.002512e-02
Education_Level_5 -3.114332e-02 -4.024639e-02 -4.816379e-02
Education_Level_6 -1.061635e-01 -1.181735e-01 -1.286192e-01
Education_Level_7 -1.082488e-01 -1.176326e-01 -1.257941e-01
Education_Level_8 -1.527949e-01 -1.615399e-01 -1.691460e-01
Livelihood_Occupation_7 3.468850e-02 3.773744e-02 4.038924e-02
Livelihood_Occupation_9 4.982723e-02 5.374642e-02 5.715514e-02
Livelihood_Occupation_17 -6.130089e-01 -6.159702e-01 -6.185458e-01
(Intercept) 8.474296e-01 8.479772e-01 8.484535e-01
(Intercept) . . .
Number_Trips 7.172152e-03 7.562525e-03 7.902034e-03
Rent_per_Month 9.471785e-06 9.838042e-06 1.015654e-05
Monthly_Remittances 1.381404e-05 1.388393e-05 1.394469e-05
Wage_Last_Head 5.514357e-06 5.660026e-06 5.786734e-06
Duration_of_stay -1.135459e-04 -1.164935e-04 -1.190571e-04
Education_Level_3 -2.502897e-02 -2.939059e-02 -3.318412e-02
Education_Level_5 -5.503740e-02 -6.102823e-02 -6.623876e-02
Education_Level_6 -1.376840e-01 -1.455883e-01 -1.524632e-01
Education_Level_7 -1.328756e-01 -1.390517e-01 -1.444234e-01
Education_Level_8 -1.757473e-01 -1.815027e-01 -1.865086e-01
Livelihood_Occupation_7 4.269522e-02 4.470127e-02 4.644603e-02
Livelihood_Occupation_9 6.011696e-02 6.269589e-02 6.493893e-02
Livelihood_Occupation_17 -6.207854e-01 -6.227338e-01 -6.244285e-01
(Intercept) 8.488679e-01 8.492282e-01 8.495219e-01
(Intercept) . . .
Number_Trips 8.197322e-03 8.454148e-03 8.680857e-03
Rent_per_Month 1.043355e-05 1.067448e-05 1.089327e-05
Monthly_Remittances 1.399754e-05 1.404350e-05 1.408814e-05
Wage_Last_Head 5.896938e-06 5.992788e-06 6.073496e-06
Duration_of_stay -1.212869e-04 -1.232262e-04 -1.249046e-04
Education_Level_3 -3.648354e-02 -3.935321e-02 -4.183611e-02
Education_Level_5 -7.077062e-02 -7.471220e-02 -7.812202e-02
Education_Level_6 -1.584427e-01 -1.636433e-01 -1.681410e-01
Education_Level_7 -1.490954e-01 -1.531589e-01 -1.566703e-01
Education_Level_8 -1.908624e-01 -1.946492e-01 -1.979233e-01
Livelihood_Occupation_7 4.796353e-02 4.928338e-02 5.043198e-02
Livelihood_Occupation_9 6.688982e-02 6.858660e-02 7.005996e-02
Livelihood_Occupation_17 -6.259024e-01 -6.271843e-01 -6.282995e-01
(Intercept) 8.497967e-01 8.500361e-01 8.502443e-01
(Intercept) . . .
Number_Trips 8.874761e-03 9.043351e-03 9.189982e-03
Rent_per_Month 1.107451e-05 1.123195e-05 1.136889e-05
Monthly_Remittances 1.412240e-05 1.415210e-05 1.417793e-05
Wage_Last_Head 6.146308e-06 6.209676e-06 6.264792e-06
Duration_of_stay -1.263726e-04 -1.276495e-04 -1.287601e-04
Education_Level_3 -4.400850e-02 -4.589803e-02 -4.754145e-02
Education_Level_5 -8.110596e-02 -8.370135e-02 -8.595869e-02
Education_Level_6 -1.720781e-01 -1.755027e-01 -1.784812e-01
Education_Level_7 -1.597469e-01 -1.624230e-01 -1.647506e-01
Education_Level_8 -2.007902e-01 -2.032838e-01 -2.054527e-01
Livelihood_Occupation_7 5.143028e-02 5.229858e-02 5.305378e-02
Livelihood_Occupation_9 7.134375e-02 7.246040e-02 7.343161e-02
Livelihood_Occupation_17 -6.292691e-01 -6.301125e-01 -6.308461e-01
(Intercept) 8.504036e-01 8.505627e-01 8.507022e-01
(Intercept) . . .
Number_Trips 9.320567e-03 9.431298e-03 9.527406e-03
Rent_per_Month 1.149749e-05 1.160047e-05 1.168943e-05
Monthly_Remittances 1.420556e-05 1.422473e-05 1.424112e-05
Wage_Last_Head 6.309466e-06 6.351432e-06 6.388078e-06
Duration_of_stay -1.297162e-04 -1.305572e-04 -1.312890e-04
Education_Level_3 -4.895078e-02 -5.019604e-02 -5.127963e-02
Education_Level_5 -8.789095e-02 -8.960193e-02 -9.109071e-02
Education_Level_6 -1.810358e-01 -1.832924e-01 -1.852562e-01
Education_Level_7 -1.667398e-01 -1.685041e-01 -1.700396e-01
Education_Level_8 -2.073084e-01 -2.089523e-01 -2.103828e-01
Livelihood_Occupation_7 5.371358e-02 5.428439e-02 5.478093e-02
Livelihood_Occupation_9 7.427502e-02 7.500963e-02 7.564879e-02
Livelihood_Occupation_17 -6.314857e-01 -6.320403e-01 -6.325228e-01
(Intercept) 8.508237e-01 8.509293e-01 8.510211e-01
(Intercept) . . .
Number_Trips 9.610988e-03 9.683683e-03 9.746909e-03
Rent_per_Month 1.176678e-05 1.183406e-05 1.189257e-05
Monthly_Remittances 1.425535e-05 1.426774e-05 1.427851e-05
Wage_Last_Head 6.419956e-06 6.447682e-06 6.471797e-06
Duration_of_stay -1.319256e-04 -1.324792e-04 -1.329607e-04
Education_Level_3 -5.222209e-02 -5.304180e-02 -5.375474e-02
Education_Level_5 -9.238558e-02 -9.351179e-02 -9.449131e-02
Education_Level_6 -1.869643e-01 -1.884500e-01 -1.897421e-01
Education_Level_7 -1.713750e-01 -1.725366e-01 -1.735468e-01
Education_Level_8 -2.116270e-01 -2.127092e-01 -2.136503e-01
Livelihood_Occupation_7 5.521281e-02 5.558843e-02 5.591513e-02
Livelihood_Occupation_9 7.620470e-02 7.668821e-02 7.710873e-02
Livelihood_Occupation_17 -6.329424e-01 -6.333073e-01 -6.336248e-01
(Intercept) 8.510868e-01 8.511556e-01 8.512177e-01
(Intercept) . . .
Number_Trips 9.802568e-03 9.850705e-03 9.892237e-03
Rent_per_Month 1.194909e-05 1.199382e-05 1.203168e-05
Monthly_Remittances 1.429164e-05 1.429991e-05 1.430657e-05
Wage_Last_Head 6.489905e-06 6.508167e-06 6.524365e-06
Duration_of_stay -1.333723e-04 -1.337365e-04 -1.340541e-04
Education_Level_3 -5.435032e-02 -5.489100e-02 -5.536294e-02
Education_Level_5 -9.530354e-02 -9.604709e-02 -9.669623e-02
Education_Level_6 -1.908274e-01 -1.918064e-01 -1.926611e-01
Education_Level_7 -1.743850e-01 -1.751513e-01 -1.758208e-01
Education_Level_8 -2.144332e-01 -2.151472e-01 -2.157706e-01
Livelihood_Occupation_7 5.620293e-02 5.644963e-02 5.666412e-02
Livelihood_Occupation_9 7.747426e-02 7.779204e-02 7.806872e-02
Livelihood_Occupation_17 -6.339050e-01 -6.341447e-01 -6.343530e-01
(Intercept) 8.512719e-01 8.513216e-01 8.513611e-01
(Intercept) . . .
Number_Trips 9.928307e-03 9.955940e-03 9.983034e-03
Rent_per_Month 1.206447e-05 1.209342e-05 1.211928e-05
Monthly_Remittances 1.431228e-05 1.431929e-05 1.432428e-05
Wage_Last_Head 6.538488e-06 6.547277e-06 6.557589e-06
Duration_of_stay -1.343305e-04 -1.345670e-04 -1.347758e-04
Education_Level_3 -5.577353e-02 -5.607983e-02 -5.638763e-02
Education_Level_5 -9.726097e-02 -9.768335e-02 -9.810613e-02
Education_Level_6 -1.934047e-01 -1.939936e-01 -1.945516e-01
Education_Level_7 -1.764033e-01 -1.768507e-01 -1.772870e-01
Education_Level_8 -2.163131e-01 -2.167341e-01 -2.171408e-01
Livelihood_Occupation_7 5.685070e-02 5.700403e-02 5.714483e-02
Livelihood_Occupation_9 7.830943e-02 7.850898e-02 7.869021e-02
Livelihood_Occupation_17 -6.345342e-01 -6.347100e-01 -6.348476e-01
(Intercept) 8.513960e-01 8.514268e-01 8.514537e-01
(Intercept) . . .
Number_Trips 1.000725e-02 1.002837e-02 1.004672e-02
Rent_per_Month 1.214111e-05 1.215978e-05 1.217591e-05
Monthly_Remittances 1.432796e-05 1.433097e-05 1.433352e-05
Wage_Last_Head 6.567189e-06 6.575689e-06 6.583119e-06
Duration_of_stay -1.349578e-04 -1.351164e-04 -1.352544e-04
Education_Level_3 -5.666297e-02 -5.690391e-02 -5.711375e-02
Education_Level_5 -9.848474e-02 -9.881610e-02 -9.910466e-02
Education_Level_6 -1.950464e-01 -1.954788e-01 -1.958553e-01
Education_Level_7 -1.776761e-01 -1.780165e-01 -1.783129e-01
Education_Level_8 -2.175027e-01 -2.178192e-01 -2.180947e-01
Livelihood_Occupation_7 5.726855e-02 5.737637e-02 5.747018e-02
Livelihood_Occupation_9 7.884956e-02 7.898848e-02 7.910938e-02
Livelihood_Occupation_17 -6.349648e-01 -6.350664e-01 -6.351547e-01
# evalusation function
evaluate_model <- function(model, actual, predicted_prob, threshold = 0.5, plot_roc = TRUE, show_summary = FALSE) {
predicted_classes <- ifelse(predicted_prob > threshold, 1, 0)
confusion <- confusionMatrix(as.factor(predicted_classes), as.factor(actual), positive = "1")
roc_result <- roc(actual, predicted_prob)
auc_value <- auc(roc_result)
if (plot_roc) {
plot(roc_result, main = "ROC Curve")
abline(a = 0, b = 1, col = "red") # Adding a reference line
text(x = 0.8, y = 0.2, labels = paste("AUC =", round(auc_value, 2)))
}
# Optionally print model summary
if (show_summary && !is.null(model)) {
print(summary(model))
}
list(
Accuracy = confusion$overall['Accuracy'],
Precision = confusion$byClass['Precision'],
Recall = confusion$byClass['Sensitivity'],
Specificity = confusion$byClass['Specificity'],
AUC = auc_value,
ROC = roc_result
)
}
#set.seed(123) # for reproducibility
#train_idx <- createDataPartition(df1$Work_Earn_Money_1, p = 0.8, list = FALSE)
#train_set <- df1[train_idx, ]
#test_set <- df1[-train_idx, ]Elastic
# Train Elastic Net regression model
elastic.mod <- glmnet(x[train, ], y[train], alpha = 0.5, lambda.min.ratio = 0.000001)
# Cross-validation for selecting lambda
cv.out <- cv.glmnet(x[train, ], y[train], alpha = 0.5, lambda.min.ratio = 0.000001)
# Find lambda that minimizes training MSE
bestlam <- cv.out$lambda.min
# Predict the test data with the best lambda
elastic.pred <- predict(elastic.mod, s = bestlam, newx = x[test, ])
# Assuming predictions are probabilities, convert them to binary classes
predicted_classes <- ifelse(elastic.pred > 0.5, 1, 0)
# Confusion matrix
conf_matrix <- table(predicted_classes, y[test])
# Calculate Sensitivity (True Positive Rate)
sensitivity <- conf_matrix[2, 2] / sum(conf_matrix[2, ])
# Calculate Specificity (True Negative Rate)
specificity <- conf_matrix[1, 1] / sum(conf_matrix[1, ])
# Calculate Accuracy
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
# Calculate Precision (Positive Predictive Value)
precision <- conf_matrix[2, 2] / sum(conf_matrix[, 2])
# Calculate Recall (Same as Sensitivity)
recall <- sensitivity
# Calculate False Positive Rate
fpr <- 1 - specificity
F1 <- 2 * (precision * recall) / (precision + recall)
# Create ROC Curve
roc_curve_elastic <- roc(y[test], elastic.pred)
# Calculate AUC (Area Under Curve)
auc_elastic <- round(auc(roc_curve_elastic), 2)
roc_curve <- roc(y[test], elastic.pred)
# Calculate AUC (Area Under Curve)
auc_value <- auc(roc_curve)
# Print the metrics
print("Metrics:")
print(paste("Sensitivity (True Positive Rate):", sensitivity))
print(paste("Specificity (True Negative Rate):", specificity))
print(paste("Accuracy:", accuracy))
print(paste("Precision (Positive Predictive Value):", precision))
print(paste("Recall:", recall))
print(paste("False Positive Rate:", fpr))
print(paste("AUC (Area Under Curve):", auc_elastic))
print(paste("FI", F1))
# Plot ROC curve
plot(roc_curve_lasso, main = "ROC Curve", col = "blue", lwd = 2)
plot(roc_curve_elastic, main = "ROC Curve", col = "green", add=TRUE)
# Add a legend
legend("bottomright", legend = c("Curve 1", "Curve 2", "Curve 3"),
col = c("blue", "red", "green"), lty = 1, lwd = 2)
print(paste("AUC (Area Under Curve):", auc_value))
# Plot ROC curve
plot(roc_curve, main = "ROC Curve", col = "blue")
coef(elastic.mod)Setting levels: control = 0, case = 1
Warning message in roc.default(y[test], elastic.pred):
"Deprecated use a matrix as predictor. Unexpected results may be produced, please pass a numeric vector."
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Warning message in roc.default(y[test], elastic.pred):
"Deprecated use a matrix as predictor. Unexpected results may be produced, please pass a numeric vector."
Setting direction: controls < cases
[[ suppressing 48 column names 's0', 's1', 's2' ... ]]
[1] "Metrics:"
[1] "Sensitivity (True Positive Rate): 0.917808219178082"
[1] "Specificity (True Negative Rate): 0.674418604651163"
[1] "Accuracy: 0.892156862745098"
[1] "Precision (Positive Predictive Value): 0.959885386819484"
[1] "Recall: 0.917808219178082"
[1] "False Positive Rate: 0.325581395348837"
[1] "AUC (Area Under Curve): 0.89"
[1] "FI 0.938375350140056"
[1] "AUC (Area Under Curve): 0.88514399494925"
15 x 48 sparse Matrix of class "dgCMatrix"
(Intercept) 0.8385678 0.84285047 0.8468891 0.8506682 0.8541789
(Intercept) . . . . .
Number_Trips . . . . .
Rent_per_Month . . . . .
Monthly_Remittances . . . . .
Wage_Last_Head . . . . .
Duration_of_stay . . . . .
Education_Level_3 . . . . .
Education_Level_5 . . . . .
Education_Level_6 . . . . .
Education_Level_7 . . . . .
Education_Level_8 . . . . .
Livelihood_Occupation_7 . . . . .
Livelihood_Occupation_9 . . . . .
Livelihood_Occupation_17 . -0.06371919 -0.1238083 -0.1800357 -0.2322686
(Intercept) 0.8574181 8.566038e-01 8.537586e-01 8.512003e-01
(Intercept) . . . .
Number_Trips . . . .
Rent_per_Month . . . .
Monthly_Remittances . 1.520649e-06 3.748323e-06 5.756193e-06
Wage_Last_Head . . . .
Duration_of_stay . . . .
Education_Level_3 . . . .
Education_Level_5 . . . .
Education_Level_6 . . . .
Education_Level_7 . . . .
Education_Level_8 . . . .
Livelihood_Occupation_7 . . . .
Livelihood_Occupation_9 . . . .
Livelihood_Occupation_17 -0.2804641 -3.222402e-01 -3.588579e-01 -3.919535e-01
(Intercept) 8.489083e-01 8.453661e-01 8.429289e-01
(Intercept) . . .
Number_Trips . . .
Rent_per_Month . . .
Monthly_Remittances 7.558714e-06 8.909284e-06 9.954236e-06
Wage_Last_Head . 2.906249e-07 7.487624e-07
Duration_of_stay . . .
Education_Level_3 . . .
Education_Level_5 . . .
Education_Level_6 . . .
Education_Level_7 . . .
Education_Level_8 . . -9.110655e-03
Livelihood_Occupation_7 . . .
Livelihood_Occupation_9 . . .
Livelihood_Occupation_17 -4.217347e-01 -4.473193e-01 -4.705898e-01
(Intercept) 8.435503e-01 8.450980e-01 8.460103e-01
(Intercept) . . .
Number_Trips . . .
Rent_per_Month . . 1.334076e-06
Monthly_Remittances 1.072599e-05 1.136842e-05 1.192010e-05
Wage_Last_Head 1.358990e-06 1.936655e-06 2.403417e-06
Duration_of_stay -1.049434e-05 -2.594694e-05 -4.033518e-05
Education_Level_3 . . .
Education_Level_5 . . .
Education_Level_6 . . .
Education_Level_7 . . -9.930477e-03
Education_Level_8 -2.758964e-02 -4.436355e-02 -6.065584e-02
Livelihood_Occupation_7 . . .
Livelihood_Occupation_9 . . 4.181405e-03
Livelihood_Occupation_17 -4.923683e-01 -5.119171e-01 -5.292203e-01
(Intercept) 8.447097e-01 8.433453e-01 8.422186e-01
(Intercept) . . .
Number_Trips 9.540962e-04 1.909808e-03 2.740899e-03
Rent_per_Month 2.776449e-06 3.968225e-06 4.997049e-06
Monthly_Remittances 1.229839e-05 1.260150e-05 1.285199e-05
Wage_Last_Head 2.804196e-06 3.186687e-06 3.531661e-06
Duration_of_stay -5.212749e-05 -6.226260e-05 -7.123564e-05
Education_Level_3 . . .
Education_Level_5 . . .
Education_Level_6 -1.139670e-03 -2.238388e-02 -4.119230e-02
Education_Level_7 -2.689973e-02 -4.223122e-02 -5.584779e-02
Education_Level_8 -7.573697e-02 -8.945260e-02 -1.016697e-01
Livelihood_Occupation_7 4.141180e-03 1.208598e-02 1.905108e-02
Livelihood_Occupation_9 1.353155e-02 2.225329e-02 2.992357e-02
Livelihood_Occupation_17 -5.438599e-01 -5.561587e-01 -5.670003e-01
(Intercept) 8.412689e-01 8.409980e-01 8.411810e-01
(Intercept) . . .
Number_Trips 3.466776e-03 4.243041e-03 5.032886e-03
Rent_per_Month 5.895048e-06 6.690553e-06 7.391334e-06
Monthly_Remittances 1.306387e-05 1.323703e-05 1.337345e-05
Wage_Last_Head 3.839726e-06 4.145732e-06 4.444646e-06
Duration_of_stay -7.915813e-05 -8.621658e-05 -9.248846e-05
Education_Level_3 . . .
Education_Level_5 . -6.516601e-03 -1.704901e-02
Education_Level_6 -5.779952e-02 -7.334890e-02 -8.777688e-02
Education_Level_7 -6.790836e-02 -7.969748e-02 -9.102223e-02
Education_Level_8 -1.125215e-01 -1.234628e-01 -1.341937e-01
Livelihood_Occupation_7 2.514689e-02 2.979341e-02 3.331620e-02
Livelihood_Occupation_9 3.665474e-02 4.215064e-02 4.664379e-02
Livelihood_Occupation_17 -5.765386e-01 -5.848634e-01 -5.921313e-01
(Intercept) 8.423439e-01 8.434214e-01 8.443943e-01
(Intercept) . . .
Number_Trips 5.686318e-03 6.258949e-03 6.758822e-03
Rent_per_Month 8.023419e-06 8.582501e-06 9.064470e-06
Monthly_Remittances 1.351203e-05 1.363386e-05 1.373541e-05
Wage_Last_Head 4.709414e-06 4.944135e-06 5.152138e-06
Duration_of_stay -9.798455e-05 -1.028093e-04 -1.070414e-04
Education_Level_3 -6.980583e-03 -1.343157e-02 -1.909890e-02
Education_Level_5 -2.735034e-02 -3.651136e-02 -4.457254e-02
Education_Level_6 -1.015176e-01 -1.136663e-01 -1.243387e-01
Education_Level_7 -1.019935e-01 -1.117352e-01 -1.203088e-01
Education_Level_8 -1.446575e-01 -1.539685e-01 -1.621755e-01
Livelihood_Occupation_7 3.657910e-02 3.941246e-02 4.187202e-02
Livelihood_Occupation_9 5.092117e-02 5.466304e-02 5.792805e-02
Livelihood_Occupation_17 -5.977407e-01 -6.026065e-01 -6.068614e-01
(Intercept) 8.452458e-01 8.460204e-01 8.467076e-01
(Intercept) . . .
Number_Trips 7.198841e-03 7.580393e-03 7.913531e-03
Rent_per_Month 9.492315e-06 9.856275e-06 1.017277e-05
Monthly_Remittances 1.382607e-05 1.389940e-05 1.396200e-05
Wage_Last_Head 5.333676e-06 5.495776e-06 5.638434e-06
Duration_of_stay -1.107425e-04 -1.139886e-04 -1.168277e-04
Education_Level_3 -2.406772e-02 -2.843473e-02 -3.226138e-02
Education_Level_5 -5.165323e-02 -5.788396e-02 -6.335066e-02
Education_Level_6 -1.336874e-01 -1.419013e-01 -1.490954e-01
Education_Level_7 -1.278340e-01 -1.344578e-01 -1.402683e-01
Education_Level_8 -1.693915e-01 -1.757486e-01 -1.813311e-01
Livelihood_Occupation_7 4.400508e-02 4.585645e-02 4.746316e-02
Livelihood_Occupation_9 6.077111e-02 6.325125e-02 6.541180e-02
Livelihood_Occupation_17 -6.105762e-01 -6.138190e-01 -6.166480e-01
(Intercept) 8.473156e-01 8.478525e-01 8.483255e-01
(Intercept) . . .
Number_Trips 8.204260e-03 8.457866e-03 8.679002e-03
Rent_per_Month 1.044800e-05 1.068733e-05 1.089544e-05
Monthly_Remittances 1.401556e-05 1.406144e-05 1.410082e-05
Wage_Last_Head 5.763791e-06 5.873804e-06 5.970242e-06
Duration_of_stay -1.193091e-04 -1.214766e-04 -1.233688e-04
Education_Level_3 -3.561137e-02 -3.854165e-02 -4.110294e-02
Education_Level_5 -6.814173e-02 -7.233662e-02 -7.600641e-02
Education_Level_6 -1.553908e-01 -1.608955e-01 -1.657055e-01
Education_Level_7 -1.453598e-01 -1.498171e-01 -1.537160e-01
Education_Level_8 -1.862274e-01 -1.905174e-01 -1.942725e-01
Livelihood_Occupation_7 4.885780e-02 5.006860e-02 5.111999e-02
Livelihood_Occupation_9 6.729349e-02 6.893200e-02 7.035851e-02
Livelihood_Occupation_17 -6.191149e-01 -6.212654e-01 -6.231396e-01
(Intercept) 8.487229e-01 8.490903e-01 8.494129e-01
(Intercept) . . .
Number_Trips 8.874946e-03 9.042587e-03 9.188581e-03
Rent_per_Month 1.108526e-05 1.124170e-05 1.137756e-05
Monthly_Remittances 1.413911e-05 1.416777e-05 1.419236e-05
Wage_Last_Head 6.052211e-06 6.126370e-06 6.191246e-06
Duration_of_stay -1.250121e-04 -1.264531e-04 -1.277096e-04
Education_Level_3 -4.332840e-02 -4.528305e-02 -4.698882e-02
Education_Level_5 -7.919798e-02 -8.200242e-02 -8.445113e-02
Education_Level_6 -1.698826e-01 -1.735516e-01 -1.767529e-01
Education_Level_7 -1.571033e-01 -1.600825e-01 -1.626837e-01
Education_Level_8 -1.975395e-01 -2.004119e-01 -2.029210e-01
Livelihood_Occupation_7 5.203348e-02 5.282657e-02 5.351559e-02
Livelihood_Occupation_9 7.159783e-02 7.267890e-02 7.361982e-02
Livelihood_Occupation_17 -6.247726e-01 -6.261949e-01 -6.274336e-01
(Intercept) 8.496956e-01 8.499211e-01 8.501391e-01
(Intercept) . . .
Number_Trips 9.315737e-03 9.429685e-03 9.525889e-03
Rent_per_Month 1.149568e-05 1.160810e-05 1.169678e-05
Monthly_Remittances 1.421356e-05 1.423706e-05 1.425250e-05
Wage_Last_Head 6.247933e-06 6.294207e-06 6.337676e-06
Duration_of_stay -1.288049e-04 -1.297496e-04 -1.305818e-04
Education_Level_3 -4.847669e-02 -4.975475e-02 -5.088750e-02
Education_Level_5 -8.658810e-02 -8.842290e-02 -9.005144e-02
Education_Level_6 -1.795448e-01 -1.819439e-01 -1.840684e-01
Education_Level_7 -1.649537e-01 -1.669001e-01 -1.686298e-01
Education_Level_8 -2.051115e-01 -2.069938e-01 -2.086638e-01
Livelihood_Occupation_7 5.411426e-02 5.463697e-02 5.508867e-02
Livelihood_Occupation_9 7.443863e-02 7.514952e-02 7.576945e-02
Livelihood_Occupation_17 -6.285122e-01 -6.294528e-01 -6.302700e-01
(Intercept) 8.503309e-01 8.504984e-01 8.506446e-01
(Intercept) . . .
Number_Trips 9.609447e-03 9.682170e-03 9.745463e-03
Rent_per_Month 1.177330e-05 1.183981e-05 1.189763e-05
Monthly_Remittances 1.426555e-05 1.427682e-05 1.428658e-05
Wage_Last_Head 6.375738e-06 6.408939e-06 6.437883e-06
Duration_of_stay -1.313072e-04 -1.319388e-04 -1.324888e-04
Education_Level_3 -5.187508e-02 -5.273548e-02 -5.348490e-02
Education_Level_5 -9.147163e-02 -9.270928e-02 -9.378756e-02
Education_Level_6 -1.859207e-01 -1.875343e-01 -1.889396e-01
Education_Level_7 -1.701384e-01 -1.714531e-01 -1.725985e-01
Education_Level_8 -2.101205e-01 -2.113903e-01 -2.124967e-01
Livelihood_Occupation_7 5.548135e-02 5.582267e-02 5.611937e-02
Livelihood_Occupation_9 7.630904e-02 7.677851e-02 7.718694e-02
Livelihood_Occupation_17 -6.309813e-01 -6.316005e-01 -6.321392e-01
(Intercept) 8.507722e-01 8.508681e-01 8.509645e-01
(Intercept) . . .
Number_Trips 9.800543e-03 9.849588e-03 9.891570e-03
Rent_per_Month 1.194791e-05 1.199788e-05 1.203634e-05
Monthly_Remittances 1.429502e-05 1.430631e-05 1.431276e-05
Wage_Last_Head 6.463111e-06 6.482141e-06 6.501312e-06
Duration_of_stay -1.329677e-04 -1.333768e-04 -1.337395e-04
Education_Level_3 -5.413753e-02 -5.468170e-02 -5.517774e-02
Education_Level_5 -9.472680e-02 -9.550615e-02 -9.622085e-02
Education_Level_6 -1.901634e-01 -1.911905e-01 -1.921196e-01
Education_Level_7 -1.735961e-01 -1.744250e-01 -1.751836e-01
Education_Level_8 -2.134607e-01 -2.142651e-01 -2.149982e-01
Livelihood_Occupation_7 5.637731e-02 5.660501e-02 5.679953e-02
Livelihood_Occupation_9 7.754226e-02 7.785087e-02 7.811942e-02
Livelihood_Occupation_17 -6.326081e-01 -6.330200e-01 -6.333745e-01
(Intercept) 8.510509e-01 8.511264e-01 8.511910e-01
(Intercept) . . .
Number_Trips 9.927741e-03 9.959156e-03 9.983752e-03
Rent_per_Month 1.206869e-05 1.209669e-05 1.212253e-05
Monthly_Remittances 1.431780e-05 1.432210e-05 1.432828e-05
Wage_Last_Head 6.518327e-06 6.533178e-06 6.542486e-06
Duration_of_stay -1.340561e-04 -1.343317e-04 -1.345662e-04
Education_Level_3 -5.561117e-02 -5.598856e-02 -5.626815e-02
Education_Level_5 -9.684542e-02 -9.738925e-02 -9.779536e-02
Education_Level_6 -1.929314e-01 -1.936382e-01 -1.941958e-01
Education_Level_7 -1.758468e-01 -1.764243e-01 -1.768678e-01
Education_Level_8 -2.156389e-01 -2.161968e-01 -2.166313e-01
Livelihood_Occupation_7 5.696861e-02 5.711567e-02 5.723680e-02
Livelihood_Occupation_9 7.835334e-02 7.855689e-02 7.872565e-02
Livelihood_Occupation_17 -6.336828e-01 -6.339511e-01 -6.342011e-01
(Intercept) 8.512464e-01 8.512957e-01 8.513393e-01
(Intercept) . . .
Number_Trips 1.000748e-02 1.002857e-02 1.004692e-02
Rent_per_Month 1.214496e-05 1.216355e-05 1.217935e-05
Monthly_Remittances 1.433225e-05 1.433496e-05 1.433708e-05
Wage_Last_Head 6.553335e-06 6.563446e-06 6.572408e-06
Duration_of_stay -1.347744e-04 -1.349563e-04 -1.351149e-04
Education_Level_3 -5.655114e-02 -5.680482e-02 -5.702699e-02
Education_Level_5 -9.820309e-02 -9.856851e-02 -9.888847e-02
Education_Level_6 -1.947264e-01 -1.951977e-01 -1.956099e-01
Education_Level_7 -1.773010e-01 -1.776876e-01 -1.780260e-01
Education_Level_8 -2.170504e-01 -2.174234e-01 -2.177496e-01
Livelihood_Occupation_7 5.734755e-02 5.744495e-02 5.752984e-02
Livelihood_Occupation_9 7.887872e-02 7.901348e-02 7.913103e-02
Livelihood_Occupation_17 -6.344046e-01 -6.345794e-01 -6.347311e-01
# Calculate the test MSE
elastic_mse <- mean((elastic.pred - y[test])^2)
# Print and report test error
print("Report Test Error:")
cat("Elastic Net MSE:", elastic_mse, "\n")
print("Elastic Net Regression combines Lasso and Ridge regularization.")
# Plot of non-zero coefficient estimates
plot(elastic.mod, xvar = "lambda")
# Get final model with selected lambda
elastic.mod.final <- glmnet(x[train, ], y[train], alpha = 0.5, lambda = bestlam)
# Sparse matrix[1] "Report Test Error:"
Elastic Net MSE: 0.08502231
[1] "Elastic Net Regression combines Lasso and Ridge regularization."
From Line 487 in feature_select_r_sheeba.qmd
Note about Feature Selection
Feature selection was performed on logistic regression model. Then the selected features were used for Ridge, Lasso, and Elastic. Feature selection was not repeated on ensemble methods to see if ensemble methods prioritized different features in the models.
Decision Tree
set.seed(123) # for reproducibility
train_idx <- createDataPartition(df1$Work_Earn_Money_1, p = 0.8, list = FALSE)
train <- df1[train_idx,]
test <- df1[-train_idx,]
tree_model <- rpart(Work_Earn_Money_1 ~ ., data = train, method = "class")
library(rpart.plot)
rpart.plot(tree_model, main="Decision Tree Model", extra=102) # extra=102 to show node numbers and splits
predictions_prob <- predict(tree_model, newdata = test, type = "prob")
results <- evaluate_model(tree_model, test$Work_Earn_Money_1, predictions_prob[,2], plot_roc = TRUE, show_summary = TRUE)
print(results)Setting levels: control = 0, case = 1
Setting direction: controls < cases
Call:
rpart(formula = Work_Earn_Money_1 ~ ., data = train, method = "class")
n= 1600
CP nsplit rel error xerror xstd
1 0.59760956 0 1.0000000 1.0000000 0.05795743
2 0.01394422 1 0.4023904 0.4063745 0.03893340
3 0.01000000 3 0.3745020 0.4262948 0.03980962
Variable importance
Paid_in_Taka Livelihood_Occupation_17 Food_budget
72 16 6
Wage_Last_Head Wage_First_Head Duration_of_stay
3 3 1
Node number 1: 1600 observations, complexity param=0.5976096
predicted class=1 expected loss=0.156875 P(node) =1
class counts: 251 1349
probabilities: 0.157 0.843
left son=2 (210 obs) right son=3 (1390 obs)
Primary splits:
Paid_in_Taka < 1 to the left, improve=237.07340, (0 missing)
Livelihood_Occupation_17 < 0.5 to the right, improve= 92.04358, (0 missing)
Food_budget < 1.5 to the left, improve= 79.03357, (0 missing)
Monthly_Remittances < 1 to the left, improve= 67.25052, (0 missing)
Monthly_Savings < 1.5 to the left, improve= 38.83325, (0 missing)
Surrogate splits:
Livelihood_Occupation_17 < 0.5 to the right, agree=0.898, adj=0.224, (0 split)
Food_budget < 1.5 to the left, agree=0.879, adj=0.081, (0 split)
Node number 2: 210 observations, complexity param=0.01394422
predicted class=0 expected loss=0.1428571 P(node) =0.13125
class counts: 180 30
probabilities: 0.857 0.143
left son=4 (186 obs) right son=5 (24 obs)
Primary splits:
Wage_Last_Head < 750 to the left, improve=8.619432, (0 missing)
Wage_First_Head < 450 to the left, improve=7.569876, (0 missing)
Education_Level_2 < 0.5 to the left, improve=3.752896, (0 missing)
Livelihood_Occupation_17 < 0.5 to the right, improve=3.038709, (0 missing)
Livelihood_Occupation_2 < 0.5 to the left, improve=2.814749, (0 missing)
Surrogate splits:
Wage_First_Head < 450 to the left, agree=0.981, adj=0.833, (0 split)
Node number 3: 1390 observations
predicted class=1 expected loss=0.05107914 P(node) =0.86875
class counts: 71 1319
probabilities: 0.051 0.949
Node number 4: 186 observations
predicted class=0 expected loss=0.09139785 P(node) =0.11625
class counts: 169 17
probabilities: 0.909 0.091
Node number 5: 24 observations, complexity param=0.01394422
predicted class=1 expected loss=0.4583333 P(node) =0.015
class counts: 11 13
probabilities: 0.458 0.542
left son=10 (7 obs) right son=11 (17 obs)
Primary splits:
Duration_of_stay < 102 to the right, improve=3.1435570, (0 missing)
Wage_First_Head < 6500 to the right, improve=2.0416670, (0 missing)
Wage_Last_Head < 9500 to the right, improve=2.0416670, (0 missing)
Age_First_Marriage < 20 to the left, improve=0.3082751, (0 missing)
Livelihood_Occupation_17 < 0.5 to the right, improve=0.2528011, (0 missing)
Surrogate splits:
Wage_First_Head < 8500 to the right, agree=0.875, adj=0.571, (0 split)
Wage_Last_Head < 9500 to the right, agree=0.792, adj=0.286, (0 split)
Livelihood_Occupation_10 < 0.5 to the right, agree=0.750, adj=0.143, (0 split)
Node number 10: 7 observations
predicted class=0 expected loss=0.1428571 P(node) =0.004375
class counts: 6 1
probabilities: 0.857 0.143
Node number 11: 17 observations
predicted class=1 expected loss=0.2941176 P(node) =0.010625
class counts: 5 12
probabilities: 0.294 0.706
n= 1600
node), split, n, loss, yval, (yprob)
* denotes terminal node
1) root 1600 251 1 (0.15687500 0.84312500)
2) Paid_in_Taka< 1 210 30 0 (0.85714286 0.14285714)
4) Wage_Last_Head< 750 186 17 0 (0.90860215 0.09139785) *
5) Wage_Last_Head>=750 24 11 1 (0.45833333 0.54166667)
10) Duration_of_stay>=102 7 1 0 (0.85714286 0.14285714) *
11) Duration_of_stay< 102 17 5 1 (0.29411765 0.70588235) *
3) Paid_in_Taka>=1 1390 71 1 (0.05107914 0.94892086) *
$Accuracy
Accuracy
0.9425
$Precision
Precision
0.9382022
$Recall
Sensitivity
0.9970149
$Specificity
Specificity
0.6615385
$AUC
Area under the curve: 0.8354
$ROC
Call:
roc.default(response = actual, predictor = predicted_prob)
Data: predicted_prob in 65 controls (actual 0) < 335 cases (actual 1).
Area under the curve: 0.8354
Decision Tree (Hyperparameter Tuned)
control <- rpart.control(minsplit = 20, minbucket = 7, maxdepth=30)
fit <- rpart(Work_Earn_Money_1 ~ ., data=train, method="class", control=control)Decision Tree Plot
rpart.plot(fit, main="Decision Tree Model (Hyperparameter Tuning)", extra=102) # extra=102 to show node numbers and splits
predictions_prob_rf <- predict(fit, newdata = test, type = "prob")
results <- evaluate_model(fit, test$Work_Earn_Money_1, predictions_prob_rf[,2], plot_roc = TRUE, show_summary = TRUE)
roc_curve_decision <- roc(test$Work_Earn_Money_1, predictions_prob_rf[, 2])
# Save AUC
auc_decision <- round(results$Accuracy, 2)
# Plot ROC curve
plot(roc_curve_lasso, main = "ROC Curve", col = "blue", lwd = 2)
plot(roc_curve_elastic, main = "ROC Curve", col = "green", add=TRUE)
plot(roc_curve_decision, main = "ROC Curve", col = "red", add=TRUE)
# Add a legend
legend("bottomright", legend = c("Curve 1", "Curve 2", "Curve 3"),
col = c("blue", "red", "green"), lty = 1, lwd = 2)Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Call:
rpart(formula = Work_Earn_Money_1 ~ ., data = train, method = "class",
control = control)
n= 1600
CP nsplit rel error xerror xstd
1 0.59760956 0 1.0000000 1.0000000 0.05795743
2 0.01394422 1 0.4023904 0.4023904 0.03875500
3 0.01000000 3 0.3745020 0.4143426 0.03928699
Variable importance
Paid_in_Taka Livelihood_Occupation_17 Food_budget
72 16 6
Wage_Last_Head Wage_First_Head Duration_of_stay
3 3 1
Node number 1: 1600 observations, complexity param=0.5976096
predicted class=1 expected loss=0.156875 P(node) =1
class counts: 251 1349
probabilities: 0.157 0.843
left son=2 (210 obs) right son=3 (1390 obs)
Primary splits:
Paid_in_Taka < 1 to the left, improve=237.07340, (0 missing)
Livelihood_Occupation_17 < 0.5 to the right, improve= 92.04358, (0 missing)
Food_budget < 1.5 to the left, improve= 79.03357, (0 missing)
Monthly_Remittances < 1 to the left, improve= 67.25052, (0 missing)
Monthly_Savings < 1.5 to the left, improve= 38.83325, (0 missing)
Surrogate splits:
Livelihood_Occupation_17 < 0.5 to the right, agree=0.898, adj=0.224, (0 split)
Food_budget < 1.5 to the left, agree=0.879, adj=0.081, (0 split)
Node number 2: 210 observations, complexity param=0.01394422
predicted class=0 expected loss=0.1428571 P(node) =0.13125
class counts: 180 30
probabilities: 0.857 0.143
left son=4 (186 obs) right son=5 (24 obs)
Primary splits:
Wage_Last_Head < 750 to the left, improve=8.619432, (0 missing)
Wage_First_Head < 450 to the left, improve=7.569876, (0 missing)
Education_Level_2 < 0.5 to the left, improve=3.752896, (0 missing)
Livelihood_Occupation_17 < 0.5 to the right, improve=3.038709, (0 missing)
Livelihood_Occupation_2 < 0.5 to the left, improve=2.814749, (0 missing)
Surrogate splits:
Wage_First_Head < 450 to the left, agree=0.981, adj=0.833, (0 split)
Node number 3: 1390 observations
predicted class=1 expected loss=0.05107914 P(node) =0.86875
class counts: 71 1319
probabilities: 0.051 0.949
Node number 4: 186 observations
predicted class=0 expected loss=0.09139785 P(node) =0.11625
class counts: 169 17
probabilities: 0.909 0.091
Node number 5: 24 observations, complexity param=0.01394422
predicted class=1 expected loss=0.4583333 P(node) =0.015
class counts: 11 13
probabilities: 0.458 0.542
left son=10 (7 obs) right son=11 (17 obs)
Primary splits:
Duration_of_stay < 102 to the right, improve=3.1435570, (0 missing)
Wage_First_Head < 6500 to the right, improve=2.0416670, (0 missing)
Wage_Last_Head < 9500 to the right, improve=2.0416670, (0 missing)
Age_First_Marriage < 20 to the left, improve=0.3082751, (0 missing)
Livelihood_Occupation_17 < 0.5 to the right, improve=0.2528011, (0 missing)
Surrogate splits:
Wage_First_Head < 8500 to the right, agree=0.875, adj=0.571, (0 split)
Wage_Last_Head < 9500 to the right, agree=0.792, adj=0.286, (0 split)
Livelihood_Occupation_10 < 0.5 to the right, agree=0.750, adj=0.143, (0 split)
Node number 10: 7 observations
predicted class=0 expected loss=0.1428571 P(node) =0.004375
class counts: 6 1
probabilities: 0.857 0.143
Node number 11: 17 observations
predicted class=1 expected loss=0.2941176 P(node) =0.010625
class counts: 5 12
probabilities: 0.294 0.706
n= 1600
node), split, n, loss, yval, (yprob)
* denotes terminal node
1) root 1600 251 1 (0.15687500 0.84312500)
2) Paid_in_Taka< 1 210 30 0 (0.85714286 0.14285714)
4) Wage_Last_Head< 750 186 17 0 (0.90860215 0.09139785) *
5) Wage_Last_Head>=750 24 11 1 (0.45833333 0.54166667)
10) Duration_of_stay>=102 7 1 0 (0.85714286 0.14285714) *
11) Duration_of_stay< 102 17 5 1 (0.29411765 0.70588235) *
3) Paid_in_Taka>=1 1390 71 1 (0.05107914 0.94892086) *
Decision Tree with Hyperparameter tuning results
specificity <- results$Specificity
accuracy <- results$Accuracy
sensitivities <- results$Recall
precision <- results$Precision
print(specificity)
print(accuracy)
print(sensitivities)
print(precision)Specificity
0.6615385
Accuracy
0.9425
Sensitivity
0.9970149
Precision
0.9382022
Random Forest
train_idx <- createDataPartition(df1$Work_Earn_Money_1, p = 0.8, list = FALSE)
train <- df1[train_idx,]
test <- df1[-train_idx,]
train$Work_Earn_Money_1 <- factor(train$Work_Earn_Money_1)
test$Work_Earn_Money_1 <- factor(test$Work_Earn_Money_1)
rf <- randomForest(Work_Earn_Money_1~., data=train, type='classification', proximity=TRUE, importance=TRUE)
print(rf)
rf_predict <- predict(rf, newdata=test, type='prob')
Call:
randomForest(formula = Work_Earn_Money_1 ~ ., data = train, type = "classification", proximity = TRUE, importance = TRUE)
Type of random forest: classification
Number of trees: 500
No. of variables tried at each split: 7
OOB estimate of error rate: 5.94%
Confusion matrix:
0 1 class.error
0 179 73 0.28968254
1 22 1326 0.01632047
# with mtry
TN <- 173
FP <- 80
FN <- 16
TP <- 1331
# Calculate metrics
accuracy <- (TP + TN) / (TP + TN + FP + FN)
precision <- TP / (TP + FP)
recall <- TP / (TP + FN) # Also known as sensitivity
specificity <- TN / (TN + FP)
# Calculate recall (also called sensitivity)
recall <- TP / (TP + FN)
# Print the results
cat(sprintf("Accuracy: %f\n", accuracy))
cat(sprintf("Precision: %f\n", precision))
cat(sprintf("Recall (Sensitivity): %f\n", recall))
cat(sprintf("Specificity: %f\n", specificity))
cat(sprintf("Sensitivity: %f\n", recall))Accuracy: 0.940000
Precision: 0.943303
Recall (Sensitivity): 0.988122
Specificity: 0.683794
Sensitivity: 0.988122
# Extracting the probabilities of the positive class (assuming it's the first class)
prob_positive_class <- rf_predict[, "1"]
# Compute ROC curve
roc_curve_random <- roc(test$Work_Earn_Money_1, prob_positive_class)
# Save AUC
auc_random <- round(auc(roc_curve_random), 2)
# Plot ROC curve
plot(roc_curve_lasso, main = "ROC Curve", col = "blue", lwd = 2)
plot(roc_curve_elastic, main = "ROC Curve", col = "green", add=TRUE)
plot(roc_curve_decision, main = "ROC Curve", col = "red", add=TRUE)
plot(roc_curve_random, main = "ROC Curve", col = "purple", add=TRUE)
# Add AUC value to the plot
# Add a legend
legend("bottomright", legend = c("Curve 1", "Curve 2", "Curve 3", "Curve 4"),
col = c("blue", "red", "green", "purple"), lty = 1, lwd = 2)
text(0.8, 0.2, paste("AUC =", round(auc(roc_curve_random), 2)), adj = 0)
roc_curve <- roc(test$Work_Earn_Money_1, prob_positive_class)
# Plot ROC curve
plot(roc_curve, main = "ROC Curve", col = "blue")
# Add AUC value to the plot
text(0.8, 0.2, paste("AUC =", round(auc(roc_curve), 2)), adj = 0)Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Variable Importance
# Plot variable importance
varImpPlot(rf, main = "Variable Importance Plot", cex = 0.6, pch = 19)XG Boost
library(xgboost)
library(caTools)
library(dplyr)
library(caret)
set.seed(42)
train_idx <- createDataPartition(df1$Work_Earn_Money_1, p = 0.8, list = FALSE)
train <- df1[train_idx,]
test <- df1[-train_idx,]
# Split the dataset into training and testing sets
X_train <- df1[train_idx, -which(names(df1) == "Work_Earn_Money_1")] # Exclude the target variable from the training set
y_train <- df1[train_idx, "Work_Earn_Money_1"] # Extract the target variable for the training set
X_test <- df1[-train_idx, -which(names(df1) == "Work_Earn_Money_1")] # Exclude the target variable from the testing set
y_test <- df1[-train_idx, "Work_Earn_Money_1"] # Extract the target variable for the testing set
xgb_train <- xgb.DMatrix(data = as.matrix(X_train), label = y_train)
xgb_test <- xgb.DMatrix(data = as.matrix(X_test), label = y_test)
xgb_params <- list(
booster = "gbtree",
eta = 0.01,
max_depth = 2,
gamma = 4,
subsample = 0.75,
colsample_bytree = 1,
objective = "binary:logistic",
eval_metric = "mlogloss",
num_class = length(levels(df1$D1A_1V1L))
)
xgb_model <- xgb.train(
params = xgb_params,
data = xgb_train,
nrounds = 5000,
verbose = 1
)
xgb_model##### xgb.Booster
raw: 4.2 Mb
call:
xgb.train(params = xgb_params, data = xgb_train, nrounds = 5000,
verbose = 1)
params (as set within xgb.train):
booster = "gbtree", eta = "0.01", max_depth = "2", gamma = "4", subsample = "0.75", colsample_bytree = "1", objective = "binary:logistic", eval_metric = "mlogloss", num_class = "0", validate_parameters = "TRUE"
xgb.attributes:
niter
callbacks:
cb.print.evaluation(period = print_every_n)
# of features: 58
niter: 5000
nfeatures : 58
# Make predictions on the test set
predictions <- predict(xgb_model, xgb_test)
# Convert probabilities to class labels, assign greater than 0.5 to Positive
pred_classes <- ifelse(predictions > 0.5, "Positive", "Negative")
# Create the confusion matrix
cm <- table(Actual = test$Work_Earn_Money_1, Predicted = pred_classes)
# Confusion Matrix
print(cm) Predicted
Actual Negative Positive
0 58 16
1 7 319
# Calculate True Positives (TP), True Negatives (TN), False Positives (FP), False Negatives (FN)
TP <- 319
TN <- 58
FP <- 16
FN <- 7
# Calculate accuracy
accuracy <- (TP + TN) / sum(cm)
# Calculate precision
precision <- TP / (TP + FP)
# Calculate recall (also called sensitivity)
recall <- TP / (TP + FN)
# Calculate F1 score
F1 <- 2 * (precision * recall) / (precision + recall)
#Specificity
# Calculate specificity
specificity <- (TN / (TN + FP))
# Print the metrics
cat("Accuracy:", accuracy, "\n")
cat("Precision:", precision, "\n")
cat("Recall:", recall, "\n")
cat("F1 Score:", F1, "\n")
cat("Specificity:", specificity)Accuracy: 0.9425
Precision: 0.9522388
Recall: 0.9785276
F1 Score: 0.9652042
Specificity: 0.7837838
library(pROC)
pred_probs <- predict(xgb_model, xgb_test)
# Compute ROC curve
roc_curve_xg <- roc(y_test, pred_probs)
auc_xg <- round(auc(roc_curve_xg), 2)
# Add AUC
auc_lr <- paste("Logistic Regression,", auc_lr)
auc_lasso <- paste("Lasso AUC,", auc_lasso)
auc_elastic <- paste("Elastic AUC,", auc_elastic)
auc_decision <- paste("Decision Tree with Hyperparameter \nTuning,", auc_decision)
auc_random <- paste("Random Forest,", auc_random)
auc_xg <- paste("XG Boost AUC,", auc_xg)
# Including Prof Nakul's comment to adjust x-axis correctly
#ggroc(roc_curve_lr, legacy.axes = T)
# Plot ROC curve
plot(roc_curve_lr, legacy.axes = T, main = "Comparison of ROC Curves", col = "#597fd2")
plot(roc_curve_lasso, legacy.axes = T, main = "Comparison of ROC Curves", col = "#ec5f4c", lwd = 2, add=TRUE)
plot(roc_curve_elastic, legacy.axes = T, main = "Comparison of ROC Curves", col = "#ffc929", add=TRUE)
plot(roc_curve_random, legacy.axes = T, main = "Comparison of ROC Curves", col = "#006f3c", add=TRUE)
plot(roc_curve_xg, legacy.axes = T, main = "Comparison of ROC Curves", col = "#ff0090", add=TRUE)
plot(roc_curve_decision, legacy.axes = T, main = "Comparison of ROC Curves", col = "#6a4477", add=TRUE)
# Add Legend
legend("bottomright", title="AUC Values", legend = c(auc_lr, auc_lasso, auc_elastic, auc_random, auc_xg, auc_decision), col = c("#597fd2", "#ec5f4c", "#ffc929", "#006f3c", "#ff0090", "#7d1189"), lty = 1, lwd = 2, bg = rgb(1, 1, 1, alpha = 0.7))Setting levels: control = 0, case = 1
Setting direction: controls < cases
XG Boost Tree Plot
# plot the first tree
tree_plot <- xgb.plot.tree(model = xgb_model, trees = 3)
print(tree_plot)library(xgboost)
# Make sure your environment can display graphics
# Assuming 'xgb_model' is already trained
xgb.plot.tree(model = xgb_model, trees = 3)# create plot object of XGBoost tree
tree_plot <- xgb.plot.tree(model = xgb_model, trees = 3, plot_width = 1000,
plot_height = 1000, render = FALSE)
# export plot object to file
export_graph(tree_plot, "xgboost_tree_plot.pdf", width = 1000, height = 1000)
roc_curve <- roc(y_test, pred_probs)
# Plot ROC curve
plot(roc_curve, main = "ROC Curve", col = "blue")
auc_value <- auc(roc_curve)
cat(sprintf("AUC: %f", auc_value))Setting levels: control = 0, case = 1
Setting direction: controls < cases
AUC: 0.981471
Variable Importance for XGBoost
# Get variable importance
importance <- xgb.importance(model = xgb_model)
# Print the variable importance
print(importance) Feature Gain Cover Frequency
1: Paid_in_Taka 0.5612330288 0.2414881768 0.1472964574
2: Wage_Last_Head 0.0609914469 0.0679072500 0.1202610317
3: Monthly_Remittances 0.0529529219 0.1133333039 0.0733374767
4: Duration_of_stay 0.0495385629 0.1159481241 0.0978868863
5: Livelihood_Occupation_17 0.0446480903 0.0746981992 0.0540708515
6: Age_First_Marriage 0.0415233500 0.0804492824 0.0876320696
7: Wage_First_Head 0.0350530518 0.0580184588 0.0798632691
8: Rent_per_Month 0.0304779481 0.0504952772 0.0649471722
9: Monthly_Savings 0.0177077918 0.0226183753 0.0385332505
10: Food_budget 0.0164503521 0.0300356659 0.0413300186
11: Number_Trips 0.0127972706 0.0209860825 0.0267246737
12: Education_Level_8 0.0107921067 0.0229505020 0.0248601616
13: Saving_brought_Home 0.0102594648 0.0157722135 0.0208203853
14: Can_write_letter_2 0.0097427315 0.0184533893 0.0236171535
15: Education_Level_2 0.0096760378 0.0061490748 0.0155376010
16: Livelihood_Occupation_9 0.0078442205 0.0176617918 0.0180236172
17: Livelihood_Occupation_12 0.0058585761 0.0090102716 0.0105655687
18: Education_Level_7 0.0053102806 0.0126347656 0.0146053449
19: Livelihood_Occupation_2 0.0036721257 0.0040203959 0.0083903045
20: Livelihood_Occupation_13 0.0027524912 0.0014878333 0.0055935364
21: Education_Level_4 0.0025741005 0.0058271179 0.0068365444
22: Month_Arrival_98 0.0024378208 0.0027718135 0.0059042884
23: Livelihood_Occupation_11 0.0020747526 0.0023175921 0.0046612803
24: Livelihood_Occupation_7 0.0013225912 0.0031107330 0.0034182722
25: Month_Arrival_2 0.0012421704 0.0008629921 0.0027967682
26: Education_Level_5 0.0006852165 0.0004356680 0.0015537601
27: Education_Level_3 0.0003814974 0.0005556492 0.0009322561
Feature Gain Cover Frequency